From c9aec69995ff959f05425d1610640da6f4797be7 Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Mon, 16 Sep 2019 09:09:02 +0200 Subject: [PATCH 01/18] Add attributes to keep tracks of uploaded files --- pghoard/pghoard.py | 62 +++++++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/pghoard/pghoard.py b/pghoard/pghoard.py index 2eb1311b..51d4235d 100644 --- a/pghoard/pghoard.py +++ b/pghoard/pghoard.py @@ -70,6 +70,9 @@ def __init__(self, config_path): "startup_time": datetime.datetime.utcnow().isoformat(), } self.transfer_agent_state = {} # shared among transfer agents + # Keep track of remote xlog + self.remote_xlog = {} + self.remote_basebackup = {} self.load_config() if self.config["transfer"]["thread_count"] > 1: self.mp_manager = multiprocessing.Manager() @@ -114,7 +117,8 @@ def __init__(self, config_path): mp_manager=self.mp_manager, transfer_queue=self.transfer_queue, metrics=self.metrics, - shared_state_dict=self.transfer_agent_state) + shared_state_dict=self.transfer_agent_state, + remote_xlog=self.remote_xlog) self.transfer_agents.append(ta) logutil.notify_systemd("READY=1") @@ -271,15 +275,15 @@ def delete_remote_wal_before(self, wal_segment, site, pg_version): self.log.exception("Problem deleting: %r", wal_path) self.metrics.unexpected_exception(ex, where="delete_remote_wal_before") - def delete_remote_basebackup(self, site, basebackup, metadata): + def delete_remote_basebackup(self, site, basebackup): start_time = time.monotonic() storage = self.site_transfers.get(site) - main_backup_key = os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup", basebackup) + main_backup_key = os.path.join(self.config["backup_sites"][site]["prefix"], "basebackup", basebackup["name"]) basebackup_data_files = [main_backup_key] - if metadata.get("format") == "pghoard-bb-v2": + if basebackup['metadata'].get("format") == "pghoard-bb-v2": bmeta_compressed = storage.get_contents_to_string(main_backup_key)[0] - with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=metadata, + with rohmufile.file_reader(fileobj=io.BytesIO(bmeta_compressed), metadata=basebackup['metadata'], key_lookup=config.key_lookup_for_site(self.config, site)) as input_obj: bmeta = extract_pghoard_bb_v2_metadata(input_obj) self.log.debug("PGHoard chunk metadata: %r", bmeta) @@ -299,6 +303,7 @@ def delete_remote_basebackup(self, site, basebackup, metadata): except Exception as ex: # FIXME: don't catch all exceptions; pylint: disable=broad-except self.log.exception("Problem deleting: %r", obj_key) self.metrics.unexpected_exception(ex, where="delete_remote_basebackup") + self.remote_basebackup[site].remove(basebackup) self.log.info("Deleted basebackup datafiles: %r, took: %.2fs", ', '.join(basebackup_data_files), time.monotonic() - start_time) @@ -317,6 +322,17 @@ def get_remote_basebackups_info(self, site): results.sort(key=lambda entry: entry["metadata"]["start-time"]) return results + def get_remote_xlogs_info(self, site): + storage = self.site_transfers.get(site) + if not storage: + storage_config = get_object_storage_config(self.config, site) + storage = get_transfer(storage_config) + self.site_transfers[site] = storage + + site_config = self.config["backup_sites"][site] + results = storage.list_path(os.path.join(site_config["prefix"], "xlog"), with_metadata=False) + return [os.path.basename(x['name']) for x in results] + def patch_basebackup_info(self, *, entry, site_config): # drop path from resulting list and convert timestamps entry["name"] = os.path.basename(entry["name"]) @@ -335,36 +351,43 @@ def patch_basebackup_info(self, *, entry, site_config): if "normalized-backup-time" not in metadata: metadata["normalized-backup-time"] = self.get_normalized_backup_time(site_config, now=metadata["start-time"]) - def determine_backups_to_delete(self, *, basebackups, site_config): + def determine_backups_to_delete(self, site): """Returns the basebackups in the given list that need to be deleted based on the given site configuration. Note that `basebackups` is edited in place: any basebackups that need to be deleted are removed from it.""" + site_config = self.config["backup_sites"][site] allowed_basebackup_count = site_config["basebackup_count"] if allowed_basebackup_count is None: - allowed_basebackup_count = len(basebackups) + allowed_basebackup_count = len(self.remote_basebackup[site]) basebackups_to_delete = [] - while len(basebackups) > allowed_basebackup_count: + while len(self.remote_basebackup[site]) > allowed_basebackup_count: self.log.warning("Too many basebackups: %d > %d, %r, starting to get rid of %r", - len(basebackups), allowed_basebackup_count, basebackups, basebackups[0]["name"]) - basebackups_to_delete.append(basebackups.pop(0)) + len(self.remote_basebackup[site]), + allowed_basebackup_count, + self.remote_basebackup[site], + self.remote_basebackup[site][0]["name"]) + basebackups_to_delete.append(self.remote_basebackup[site].pop(0)) backup_interval = datetime.timedelta(hours=site_config["basebackup_interval_hours"]) min_backups = site_config["basebackup_count_min"] max_age_days = site_config.get("basebackup_age_days_max") current_time = datetime.datetime.now(datetime.timezone.utc) if max_age_days and min_backups > 0: - while basebackups and len(basebackups) > min_backups: + while len(self.remote_basebackup[site]) > min_backups: # For age checks we treat the age as current_time - (backup_start_time + backup_interval). So when # backup interval is set to 24 hours a backup started 2.5 days ago would be considered to be 1.5 days old. - completed_at = basebackups[0]["metadata"]["start-time"] + backup_interval + completed_at = self.remote_basebackup[site][0]["metadata"]["start-time"] + backup_interval backup_age = current_time - completed_at # timedelta would have direct `days` attribute but that's an integer rounded down. We want a float # so that we can react immediately when age is too old backup_age_days = backup_age.total_seconds() / 60.0 / 60.0 / 24.0 if backup_age_days > max_age_days: self.log.warning("Basebackup %r too old: %.3f > %.3f, %r, starting to get rid of it", - basebackups[0]["name"], backup_age_days, max_age_days, basebackups) - basebackups_to_delete.append(basebackups.pop(0)) + self.remote_basebackup[site][0]["name"], + backup_age_days, + max_age_days, + self.remote_basebackup) + basebackups_to_delete.append(self.remote_basebackup[site].pop(0)) else: break @@ -373,25 +396,24 @@ def determine_backups_to_delete(self, *, basebackups, site_config): def refresh_backup_list_and_delete_old(self, site): """Look up basebackups from the object store, prune any extra backups and return the datetime of the latest backup.""" - basebackups = self.get_remote_basebackups_info(site) - self.log.debug("Found %r basebackups", basebackups) + self.log.debug("Found %r basebackups", self.remote_basebackup[site]) site_config = self.config["backup_sites"][site] # Never delete backups from a recovery site. This check is already elsewhere as well # but still check explicitly here to ensure we certainly won't delete anything unexpectedly if site_config["active"]: - basebackups_to_delete = self.determine_backups_to_delete(basebackups=basebackups, site_config=site_config) + basebackups_to_delete = self.determine_backups_to_delete(site) for basebackup_to_be_deleted in basebackups_to_delete: pg_version = basebackup_to_be_deleted["metadata"].get("pg-version") last_wal_segment_still_needed = 0 - if basebackups: - last_wal_segment_still_needed = basebackups[0]["metadata"]["start-wal-segment"] + if len(self.remote_basebackup[site]) > 0: + last_wal_segment_still_needed = self.remote_basebackup[site][0]["metadata"]["start-wal-segment"] if last_wal_segment_still_needed: self.delete_remote_wal_before(last_wal_segment_still_needed, site, pg_version) self.delete_remote_basebackup(site, basebackup_to_be_deleted["name"], basebackup_to_be_deleted["metadata"]) - self.state["backup_sites"][site]["basebackups"] = basebackups + self.state["backup_sites"][site]["basebackups"] = self.remote_basebackup[site] def get_normalized_backup_time(self, site_config, *, now=None): """Returns the closest historical backup time that current time matches to (or current time if it matches). From 53e7ea9390fd99f507e77bde0a63ee14c49dc8c1 Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Wed, 25 Sep 2019 14:42:48 +0200 Subject: [PATCH 02/18] Update remote metrics at the end of the transfer --- pghoard/pghoard.py | 2 +- pghoard/transfer.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pghoard/pghoard.py b/pghoard/pghoard.py index 51d4235d..34ce1a9c 100644 --- a/pghoard/pghoard.py +++ b/pghoard/pghoard.py @@ -118,7 +118,7 @@ def __init__(self, config_path): transfer_queue=self.transfer_queue, metrics=self.metrics, shared_state_dict=self.transfer_agent_state, - remote_xlog=self.remote_xlog) + pghoard=self) self.transfer_agents.append(ta) logutil.notify_systemd("READY=1") diff --git a/pghoard/transfer.py b/pghoard/transfer.py index 7c06cd83..a311d0a8 100644 --- a/pghoard/transfer.py +++ b/pghoard/transfer.py @@ -24,7 +24,7 @@ class TransferAgent(Thread): def __init__(self, config, compression_queue, mp_manager, transfer_queue, metrics, - shared_state_dict): + shared_state_dict, pghoard): super().__init__() self.log = logging.getLogger("TransferAgent") self.config = config @@ -36,6 +36,7 @@ def __init__(self, config, compression_queue, mp_manager, transfer_queue, metric self.running = True self.sleep = time.sleep self.state = shared_state_dict + self.pghoard = pghoard self.site_transfers = {} self.log.debug("TransferAgent initialized") @@ -252,6 +253,17 @@ def handle_upload(self, site, key, file_to_transfer): except Exception as ex: # pylint: disable=broad-except self.log.exception("Problem in deleting file: %r", file_to_transfer["local_path"]) self.metrics.unexpected_exception(ex, where="handle_upload_unlink") + + # update metrics for remote xlog and base backup + if file_to_transfer.get('filetype') == 'xlog': + self.pghoard.remote_xlog[site].append(os.path.basename(key)) + elif file_to_transfer.get('filetype') == 'basebackup': + new_basebackup = list(storage.iter_key(key, include_key=True))[0].value + # patch metadata + self.pghoard.patch_basebackup_info(entry=new_basebackup, + site_config=self.pghoard.config["backup_sites"][site]) + self.pghoard.remote_basebackup[site].append(new_basebackup) + return {"success": True, "opaque": file_to_transfer.get("opaque")} except Exception as ex: # pylint: disable=broad-except if file_to_transfer.get("retry_number", 0) > 0: From ccdb6bfa2b5eb90095fa9be6a5c7f25b2c2401d8 Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Wed, 25 Sep 2019 18:28:02 +0200 Subject: [PATCH 03/18] Enable scan of remote storage in handle_site() and fix some tests --- pghoard/pghoard.py | 10 ++++++++-- test/base.py | 4 ++++ test/test_basebackup.py | 2 ++ test/test_transferagent.py | 9 ++++++++- 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/pghoard/pghoard.py b/pghoard/pghoard.py index 34ce1a9c..aea1c46a 100644 --- a/pghoard/pghoard.py +++ b/pghoard/pghoard.py @@ -366,7 +366,7 @@ def determine_backups_to_delete(self, site): allowed_basebackup_count, self.remote_basebackup[site], self.remote_basebackup[site][0]["name"]) - basebackups_to_delete.append(self.remote_basebackup[site].pop(0)) + basebackups_to_delete.append(self.remote_basebackup[site][0]) backup_interval = datetime.timedelta(hours=site_config["basebackup_interval_hours"]) min_backups = site_config["basebackup_count_min"] @@ -412,7 +412,7 @@ def refresh_backup_list_and_delete_old(self, site): if last_wal_segment_still_needed: self.delete_remote_wal_before(last_wal_segment_still_needed, site, pg_version) - self.delete_remote_basebackup(site, basebackup_to_be_deleted["name"], basebackup_to_be_deleted["metadata"]) + self.delete_remote_basebackup(site, basebackup_to_be_deleted) self.state["backup_sites"][site]["basebackups"] = self.remote_basebackup[site] def get_normalized_backup_time(self, site_config, *, now=None): @@ -530,6 +530,12 @@ def handle_site(self, site, site_config): if not site_config["active"]: return # If a site has been marked inactive, don't bother checking anything + if site not in self.remote_xlog or site not in self.remote_basebackup: + self.log.info("Retrieving info from remote storage for %s" % site) + self.remote_xlog[site] = self.get_remote_xlogs_info(site) + self.remote_basebackup[site] = self.get_remote_basebackups_info(site) + self.log.info("Remote info updated for %s" % site) + self._cleanup_inactive_receivexlogs(site) chosen_backup_node = random.choice(site_config["nodes"]) diff --git a/test/base.py b/test/base.py index 71f1f32c..25bca431 100644 --- a/test/base.py +++ b/test/base.py @@ -96,6 +96,10 @@ def config_template(self, override=None): def setup_method(self, method): self.temp_dir = mkdtemp(prefix=self.__class__.__name__) self.test_site = "site_{}".format(method.__name__) + self.remote_xlog = {} + self.remote_basebackup = {} + self.remote_xlog[self.test_site] = [] + self.remote_basebackup[self.test_site] = [] def teardown_method(self, method): # pylint: disable=unused-argument rmtree(self.temp_dir) diff --git a/test/test_basebackup.py b/test/test_basebackup.py index 02ce6f6f..270621af 100644 --- a/test/test_basebackup.py +++ b/test/test_basebackup.py @@ -193,6 +193,8 @@ def _test_create_basebackup(self, capsys, db, pghoard, mode, replica=False, acti pghoard.config["backup_sites"][pghoard.test_site]["basebackup_mode"] = mode pghoard.config["backup_sites"][pghoard.test_site]["active_backup_mode"] = active_backup_mode + pghoard.remote_xlog[pghoard.test_site] = [] + pghoard.remote_basebackup[pghoard.test_site] = [] now = datetime.datetime.now(datetime.timezone.utc) metadata = { diff --git a/test/test_transferagent.py b/test/test_transferagent.py index 695845f8..2345bfe7 100644 --- a/test/test_transferagent.py +++ b/test/test_transferagent.py @@ -32,6 +32,12 @@ def store_file_from_disk(self, key, local_path, metadata, multipart=None): # py raise StorageError("foo") +class MockPGHoard(Mock): + def __init__(self): + self.remote_xlog = {} + self.remote_basebackup = {} + + class TestTransferAgent(PGHoardTestCase): def setup_method(self, method): super().setup_method(method) @@ -64,7 +70,8 @@ def setup_method(self, method): mp_manager=None, transfer_queue=self.transfer_queue, metrics=metrics.Metrics(statsd={}), - shared_state_dict={}) + shared_state_dict={}, + pghoard=MockPGHoard()) self.transfer_agent.start() def teardown_method(self, method): From d79f7aec1563366291e15d0f2c326d032b072988 Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Wed, 25 Sep 2019 21:09:57 +0200 Subject: [PATCH 04/18] Fix lint issue --- pghoard/pghoard.py | 4 ++-- test/test_pghoard.py | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pghoard/pghoard.py b/pghoard/pghoard.py index aea1c46a..9c4a4933 100644 --- a/pghoard/pghoard.py +++ b/pghoard/pghoard.py @@ -531,10 +531,10 @@ def handle_site(self, site, site_config): return # If a site has been marked inactive, don't bother checking anything if site not in self.remote_xlog or site not in self.remote_basebackup: - self.log.info("Retrieving info from remote storage for %s" % site) + self.log.info("Retrieving info from remote storage for %s", site) self.remote_xlog[site] = self.get_remote_xlogs_info(site) self.remote_basebackup[site] = self.get_remote_basebackups_info(site) - self.log.info("Remote info updated for %s" % site) + self.log.info("Remote info updated for %s", site) self._cleanup_inactive_receivexlogs(site) diff --git a/test/test_pghoard.py b/test/test_pghoard.py index 1d736f49..a3bde292 100644 --- a/test/test_pghoard.py +++ b/test/test_pghoard.py @@ -129,7 +129,7 @@ def test_determine_backups_to_delete(self): "basebackup_interval_hours": 24, } bbs_copy = list(bbs) - to_delete = self.pghoard.determine_backups_to_delete(basebackups=bbs_copy, site_config=site_config) + to_delete = self.pghoard.determine_backups_to_delete(self.test_site) assert len(bbs_copy) == 4 assert len(to_delete) == len(bbs) - len(bbs_copy) assert to_delete == bbs[:len(to_delete)] @@ -138,7 +138,7 @@ def test_determine_backups_to_delete(self): site_config["basebackup_count"] = 16 site_config["basebackup_age_days_max"] = 8 bbs_copy = list(bbs) - to_delete = self.pghoard.determine_backups_to_delete(basebackups=bbs_copy, site_config=site_config) + to_delete = self.pghoard.determine_backups_to_delete(self.test_site) # 3 of the backups are too old (start time + interval is over 8 days in the past) assert len(bbs_copy) == 10 assert len(to_delete) == len(bbs) - len(bbs_copy) @@ -147,7 +147,7 @@ def test_determine_backups_to_delete(self): site_config["basebackup_count"] = 9 bbs_copy = list(bbs) - to_delete = self.pghoard.determine_backups_to_delete(basebackups=bbs_copy, site_config=site_config) + to_delete = self.pghoard.determine_backups_to_delete(self.test_site) # basebackup_count trumps backup age and backups are removed even though they're not too old assert len(bbs_copy) == 9 assert len(to_delete) == len(bbs) - len(bbs_copy) @@ -158,7 +158,7 @@ def test_determine_backups_to_delete(self): site_config["basebackup_age_days_max"] = 2 site_config["basebackup_count_min"] = 6 bbs_copy = list(bbs) - to_delete = self.pghoard.determine_backups_to_delete(basebackups=bbs_copy, site_config=site_config) + to_delete = self.pghoard.determine_backups_to_delete(self.test_site) # basebackup_count_min ensures not that many backups are removed even though they're too old assert len(bbs_copy) == 6 assert len(to_delete) == len(bbs) - len(bbs_copy) @@ -167,7 +167,7 @@ def test_determine_backups_to_delete(self): site_config["basebackup_count_min"] = 2 bbs_copy = list(bbs) - to_delete = self.pghoard.determine_backups_to_delete(basebackups=bbs_copy, site_config=site_config) + to_delete = self.pghoard.determine_backups_to_delete(self.test_site) # 3 of the backups are new enough (start time less than 3 days in the past) assert len(bbs_copy) == 3 assert len(to_delete) == len(bbs) - len(bbs_copy) From 6cb6450912c5330de8468014ecf9aec23d5a3f2e Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Wed, 25 Sep 2019 22:20:32 +0200 Subject: [PATCH 05/18] Fix TransferAgent tests --- test/base.py | 11 +++++++-- test/test_transferagent.py | 47 +++++++++++++++++++++++++++++++------- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/test/base.py b/test/base.py index 25bca431..28e63e4b 100644 --- a/test/base.py +++ b/test/base.py @@ -6,7 +6,7 @@ """ # pylint: disable=attribute-defined-outside-init from pghoard.config import find_pg_binary, set_and_check_config_defaults -from pghoard.rohmu import compat +from pghoard.rohmu import compat, dates from shutil import rmtree from tempfile import mkdtemp import logging @@ -101,5 +101,12 @@ def setup_method(self, method): self.remote_xlog[self.test_site] = [] self.remote_basebackup[self.test_site] = [] - def teardown_method(self, method): # pylint: disable=unused-argument + def teardown_method(self, method): rmtree(self.temp_dir) + + def patch_basebackup_info(self, *, entry, site_config): # pylint: disable=unused-argument + # drop path from resulting list and convert timestamps + entry["name"] = os.path.basename(entry["name"]) + metadata = entry["metadata"] + metadata["start-time"] = dates.parse_timestamp(metadata["start-time"]) + diff --git a/test/test_transferagent.py b/test/test_transferagent.py index 2345bfe7..9e591a9a 100644 --- a/test/test_transferagent.py +++ b/test/test_transferagent.py @@ -5,6 +5,13 @@ See LICENSE for details """ # pylint: disable=attribute-defined-outside-init +import datetime +import hashlib + +import dateutil + +from pghoard.rohmu.object_storage.base import IterKeyItem, KEY_TYPE_OBJECT + from .base import PGHoardTestCase from pghoard import metrics from pghoard.rohmu.errors import FileNotFoundFromStorageError, StorageError @@ -17,11 +24,41 @@ class MockStorage(Mock): + + def init(self): + if self.init_ok != True: + self.objects = {} + now = datetime.datetime.now(dateutil.tz.tzlocal()) + self.sample_storage_date = "{} {}".format(now.isoformat().split("+", 1)[0], now.tzname()) + self.init_ok = True + + + def setup_method(self, method): + super().setup_method(method) + self.init() + def get_contents_to_string(self, key): # pylint: disable=unused-argument + self.init() return b"joo", {"key": "value"} def store_file_from_disk(self, key, local_path, metadata, multipart=None): # pylint: disable=unused-argument - pass + self.init() + self.objects[key] = local_path + + def iter_key(self, key, *, with_metadata=True, deep=False, include_key=False): + self.init() + for item in self.objects.keys(): + if key == item[0:len(key)]: + yield IterKeyItem( + type=KEY_TYPE_OBJECT, + value={ + "last_modified": self.sample_storage_date, + "md5": hashlib.md5(item.encode()).hexdigest(), + "metadata": {"start-time": self.sample_storage_date}, + "name": item, + "size": len(self.objects[item]), + }, + ) class MockStorageRaising(Mock): @@ -32,12 +69,6 @@ def store_file_from_disk(self, key, local_path, metadata, multipart=None): # py raise StorageError("foo") -class MockPGHoard(Mock): - def __init__(self): - self.remote_xlog = {} - self.remote_basebackup = {} - - class TestTransferAgent(PGHoardTestCase): def setup_method(self, method): super().setup_method(method) @@ -71,7 +102,7 @@ def setup_method(self, method): transfer_queue=self.transfer_queue, metrics=metrics.Metrics(statsd={}), shared_state_dict={}, - pghoard=MockPGHoard()) + pghoard=self) self.transfer_agent.start() def teardown_method(self, method): From 3bebb3e001a09cae6b3fb4a037af0dd4774acbdd Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Thu, 26 Sep 2019 23:00:59 +0200 Subject: [PATCH 06/18] Fix deletion of basebackup determine_backups_to_delete() should not modify self.remote_basebackup --- pghoard/pghoard.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pghoard/pghoard.py b/pghoard/pghoard.py index 9c4a4933..ca3a2c74 100644 --- a/pghoard/pghoard.py +++ b/pghoard/pghoard.py @@ -360,13 +360,15 @@ def determine_backups_to_delete(self, site): allowed_basebackup_count = len(self.remote_basebackup[site]) basebackups_to_delete = [] - while len(self.remote_basebackup[site]) > allowed_basebackup_count: + for basebackup in self.remote_basebackup[site]: + if (len(self.remote_basebackup[site]) - len(basebackups_to_delete)) <= allowed_basebackup_count: + break; self.log.warning("Too many basebackups: %d > %d, %r, starting to get rid of %r", len(self.remote_basebackup[site]), allowed_basebackup_count, self.remote_basebackup[site], self.remote_basebackup[site][0]["name"]) - basebackups_to_delete.append(self.remote_basebackup[site][0]) + basebackups_to_delete.append(basebackup) backup_interval = datetime.timedelta(hours=site_config["basebackup_interval_hours"]) min_backups = site_config["basebackup_count_min"] From c404b44e2d6630018837a96d3ce5d46a643951dc Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Thu, 26 Sep 2019 23:02:58 +0200 Subject: [PATCH 07/18] Fix unit test --- test/test_pghoard.py | 46 ++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/test/test_pghoard.py b/test/test_pghoard.py index a3bde292..b971ff39 100644 --- a/test/test_pghoard.py +++ b/test/test_pghoard.py @@ -109,50 +109,46 @@ def test_determine_backups_to_delete(self): now = datetime.datetime.now(datetime.timezone.utc) bbs = [ {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(days=10, hours=4)}}, - {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(days=9, hours=4)}}, - {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(days=9, hours=1)}}, - {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(days=8, hours=4)}}, - {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(days=7, hours=4)}}, - {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(days=6, hours=4)}}, - {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(days=6, hours=20)}}, - {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(days=5, hours=4)}}, - {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(days=4, hours=4)}}, - {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(days=3, hours=4)}}, - {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(days=2, hours=4)}}, - {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(days=1, hours=4)}}, - {"name": "bb1", "metadata": {"start-time": now - datetime.timedelta(hours=4)}}, + {"name": "bb2", "metadata": {"start-time": now - datetime.timedelta(days=9, hours=4)}}, + {"name": "bb3", "metadata": {"start-time": now - datetime.timedelta(days=9, hours=1)}}, + {"name": "bb4", "metadata": {"start-time": now - datetime.timedelta(days=8, hours=4)}}, + {"name": "bb5", "metadata": {"start-time": now - datetime.timedelta(days=7, hours=4)}}, + {"name": "bb6", "metadata": {"start-time": now - datetime.timedelta(days=6, hours=4)}}, + {"name": "bb7", "metadata": {"start-time": now - datetime.timedelta(days=6, hours=20)}}, + {"name": "bb8", "metadata": {"start-time": now - datetime.timedelta(days=5, hours=4)}}, + {"name": "bb9", "metadata": {"start-time": now - datetime.timedelta(days=4, hours=4)}}, + {"name": "bb10", "metadata": {"start-time": now - datetime.timedelta(days=3, hours=4)}}, + {"name": "bb11", "metadata": {"start-time": now - datetime.timedelta(days=2, hours=4)}}, + {"name": "bb12", "metadata": {"start-time": now - datetime.timedelta(days=1, hours=4)}}, + {"name": "bb13", "metadata": {"start-time": now - datetime.timedelta(hours=4)}}, ] + basebackup_count = 4 site_config = { - "basebackup_count": 4, + "basebackup_count": basebackup_count, "basebackup_count_min": 2, "basebackup_interval_hours": 24, } - bbs_copy = list(bbs) + self.pghoard.config["backup_sites"][self.test_site] = site_config + self.pghoard.remote_basebackup[self.test_site] = bbs to_delete = self.pghoard.determine_backups_to_delete(self.test_site) - assert len(bbs_copy) == 4 - assert len(to_delete) == len(bbs) - len(bbs_copy) + a = len(to_delete) + assert len(bbs) - len(to_delete) == 4 assert to_delete == bbs[:len(to_delete)] - assert bbs_copy == bbs[len(to_delete):] site_config["basebackup_count"] = 16 site_config["basebackup_age_days_max"] = 8 - bbs_copy = list(bbs) to_delete = self.pghoard.determine_backups_to_delete(self.test_site) # 3 of the backups are too old (start time + interval is over 8 days in the past) - assert len(bbs_copy) == 10 - assert len(to_delete) == len(bbs) - len(bbs_copy) + assert len(to_delete) == 3 assert to_delete == bbs[:len(to_delete)] - assert bbs_copy == bbs[len(to_delete):] site_config["basebackup_count"] = 9 - bbs_copy = list(bbs) to_delete = self.pghoard.determine_backups_to_delete(self.test_site) # basebackup_count trumps backup age and backups are removed even though they're not too old - assert len(bbs_copy) == 9 - assert len(to_delete) == len(bbs) - len(bbs_copy) + a = len(to_delete) + assert len(to_delete) == 9 assert to_delete == bbs[:len(to_delete)] - assert bbs_copy == bbs[len(to_delete):] site_config["basebackup_count"] = 16 site_config["basebackup_age_days_max"] = 2 From f5b26fe2af7bff571bbc1576dfb9a09f9ac1fffc Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Fri, 27 Sep 2019 00:31:14 +0200 Subject: [PATCH 08/18] In pghoard sync state with remote_basebackup --- pghoard/pghoard.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pghoard/pghoard.py b/pghoard/pghoard.py index ca3a2c74..96e8ccc3 100644 --- a/pghoard/pghoard.py +++ b/pghoard/pghoard.py @@ -415,7 +415,6 @@ def refresh_backup_list_and_delete_old(self, site): if last_wal_segment_still_needed: self.delete_remote_wal_before(last_wal_segment_still_needed, site, pg_version) self.delete_remote_basebackup(site, basebackup_to_be_deleted) - self.state["backup_sites"][site]["basebackups"] = self.remote_basebackup[site] def get_normalized_backup_time(self, site_config, *, now=None): """Returns the closest historical backup time that current time matches to (or current time if it matches). @@ -536,6 +535,7 @@ def handle_site(self, site, site_config): self.log.info("Retrieving info from remote storage for %s", site) self.remote_xlog[site] = self.get_remote_xlogs_info(site) self.remote_basebackup[site] = self.get_remote_basebackups_info(site) + self.state["backup_sites"][site]["basebackups"] = self.remote_basebackup[site] self.log.info("Remote info updated for %s", site) self._cleanup_inactive_receivexlogs(site) @@ -587,7 +587,7 @@ def get_new_backup_details(self, *, now=None, site, site_config): be created at this time""" if not now: now = datetime.datetime.now(datetime.timezone.utc) - basebackups = self.state["backup_sites"][site]["basebackups"] + basebackups = self.remote_basebackup[site] backup_hour = site_config.get("basebackup_hour") backup_minute = site_config.get("basebackup_minute") backup_reason = None From 2842b5bc64ab3b40770ad09c911e43b7134ff638 Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Fri, 27 Sep 2019 00:35:57 +0200 Subject: [PATCH 09/18] Fix unit tests Use remote_basebackup because it is the same value as the state. We also need to copy the array because the array remains the same object. Previous code replace the array with a new one ? --- test/test_basebackup.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/test_basebackup.py b/test/test_basebackup.py index 270621af..80ebb2f0 100644 --- a/test/test_basebackup.py +++ b/test/test_basebackup.py @@ -536,6 +536,7 @@ def test_handle_site(self, pghoard): site_config = deepcopy(pghoard.config["backup_sites"][pghoard.test_site]) site_config["basebackup_interval_hours"] = 1 / 3600 assert pghoard.basebackups == {} + assert pghoard.test_site not in pghoard.remote_basebackup # initialize with a single backup backup_start = time.monotonic() @@ -559,7 +560,7 @@ def test_handle_site(self, pghoard): # now call handle_site so it notices the backup has finished (this must not start a new one) pghoard.handle_site(pghoard.test_site, site_config) assert pghoard.test_site not in pghoard.basebackups - first_basebackups = pghoard.state["backup_sites"][pghoard.test_site]["basebackups"] + first_basebackups = pghoard.remote_basebackup[pghoard.test_site][:] assert first_basebackups[0]["metadata"]["backup-reason"] == "scheduled" assert first_basebackups[0]["metadata"]["backup-decision-time"] assert first_basebackups[0]["metadata"]["normalized-backup-time"] is None @@ -572,7 +573,7 @@ def test_handle_site(self, pghoard): pghoard.handle_site(pghoard.test_site, site_config) assert pghoard.test_site not in pghoard.basebackups - second_basebackups = pghoard.state["backup_sites"][pghoard.test_site]["basebackups"] + second_basebackups = pghoard.remote_basebackup[pghoard.test_site][:] second_time_of_check = pghoard.time_of_last_backup_check[pghoard.test_site] assert second_basebackups == first_basebackups assert second_time_of_check > first_time_of_check @@ -586,7 +587,7 @@ def test_handle_site(self, pghoard): pghoard.handle_site(pghoard.test_site, site_config) assert pghoard.test_site not in pghoard.basebackups - third_basebackups = pghoard.state["backup_sites"][pghoard.test_site]["basebackups"] + third_basebackups = pghoard.remote_basebackup[pghoard.test_site][:] third_time_of_check = pghoard.time_of_last_backup_check[pghoard.test_site] assert third_basebackups != second_basebackups assert third_time_of_check > second_time_of_check @@ -596,7 +597,7 @@ def test_handle_site(self, pghoard): pghoard.handle_site(pghoard.test_site, site_config) assert pghoard.test_site not in pghoard.basebackups - fourth_basebackups = pghoard.state["backup_sites"][pghoard.test_site]["basebackups"] + fourth_basebackups = pghoard.remote_basebackup[pghoard.test_site][:] fourth_time_of_check = pghoard.time_of_last_backup_check[pghoard.test_site] assert fourth_basebackups == third_basebackups assert fourth_time_of_check == third_time_of_check From 09015cf69ead07421c49496086e6176ea2436b79 Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Mon, 30 Sep 2019 11:09:14 +0200 Subject: [PATCH 10/18] Fix lint and last test of test_basebackup --- pghoard/pghoard.py | 2 +- test/base.py | 1 - test/test_basebackup.py | 3 +++ test/test_pghoard.py | 2 -- test/test_transferagent.py | 3 +-- 5 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pghoard/pghoard.py b/pghoard/pghoard.py index 96e8ccc3..e6921cde 100644 --- a/pghoard/pghoard.py +++ b/pghoard/pghoard.py @@ -362,7 +362,7 @@ def determine_backups_to_delete(self, site): basebackups_to_delete = [] for basebackup in self.remote_basebackup[site]: if (len(self.remote_basebackup[site]) - len(basebackups_to_delete)) <= allowed_basebackup_count: - break; + break self.log.warning("Too many basebackups: %d > %d, %r, starting to get rid of %r", len(self.remote_basebackup[site]), allowed_basebackup_count, diff --git a/test/base.py b/test/base.py index 28e63e4b..bd66decd 100644 --- a/test/base.py +++ b/test/base.py @@ -109,4 +109,3 @@ def patch_basebackup_info(self, *, entry, site_config): # pylint: disable=unuse entry["name"] = os.path.basename(entry["name"]) metadata = entry["metadata"] metadata["start-time"] = dates.parse_timestamp(metadata["start-time"]) - diff --git a/test/test_basebackup.py b/test/test_basebackup.py index 80ebb2f0..b2f75897 100644 --- a/test/test_basebackup.py +++ b/test/test_basebackup.py @@ -609,6 +609,9 @@ def test_get_new_backup_details(self, pghoard): site = pghoard.test_site pghoard.set_state_defaults(site) + pghoard.remote_xlog[site] = pghoard.get_remote_xlogs_info(site) + pghoard.remote_basebackup[site] = pghoard.get_remote_basebackups_info(site) + pghoard.state["backup_sites"][site]["basebackups"] = pghoard.remote_basebackup[site] site_config = pghoard.config["backup_sites"][site] # No backups, one should be created. No backup schedule defined so normalized backup time is None diff --git a/test/test_pghoard.py b/test/test_pghoard.py index b971ff39..919cb0a3 100644 --- a/test/test_pghoard.py +++ b/test/test_pghoard.py @@ -132,7 +132,6 @@ def test_determine_backups_to_delete(self): self.pghoard.config["backup_sites"][self.test_site] = site_config self.pghoard.remote_basebackup[self.test_site] = bbs to_delete = self.pghoard.determine_backups_to_delete(self.test_site) - a = len(to_delete) assert len(bbs) - len(to_delete) == 4 assert to_delete == bbs[:len(to_delete)] @@ -146,7 +145,6 @@ def test_determine_backups_to_delete(self): site_config["basebackup_count"] = 9 to_delete = self.pghoard.determine_backups_to_delete(self.test_site) # basebackup_count trumps backup age and backups are removed even though they're not too old - a = len(to_delete) assert len(to_delete) == 9 assert to_delete == bbs[:len(to_delete)] diff --git a/test/test_transferagent.py b/test/test_transferagent.py index 9e591a9a..d339ac92 100644 --- a/test/test_transferagent.py +++ b/test/test_transferagent.py @@ -26,13 +26,12 @@ class MockStorage(Mock): def init(self): - if self.init_ok != True: + if self.init_ok is not True: self.objects = {} now = datetime.datetime.now(dateutil.tz.tzlocal()) self.sample_storage_date = "{} {}".format(now.isoformat().split("+", 1)[0], now.tzname()) self.init_ok = True - def setup_method(self, method): super().setup_method(method) self.init() From a8977b0e4d2de58b905b51039dec71761e40da69 Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Mon, 30 Sep 2019 15:34:28 +0200 Subject: [PATCH 11/18] Fix determine_backup_to_delete() and unit tests --- pghoard/pghoard.py | 19 ++++++++++++------- test/test_pghoard.py | 22 +++++++++++----------- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/pghoard/pghoard.py b/pghoard/pghoard.py index e6921cde..086fdf18 100644 --- a/pghoard/pghoard.py +++ b/pghoard/pghoard.py @@ -360,36 +360,41 @@ def determine_backups_to_delete(self, site): allowed_basebackup_count = len(self.remote_basebackup[site]) basebackups_to_delete = [] - for basebackup in self.remote_basebackup[site]: - if (len(self.remote_basebackup[site]) - len(basebackups_to_delete)) <= allowed_basebackup_count: + remote_basebackups = self.remote_basebackup[site][:] + for basebackup in remote_basebackups: + if (len(remote_basebackups) - len(basebackups_to_delete)) <= allowed_basebackup_count: break self.log.warning("Too many basebackups: %d > %d, %r, starting to get rid of %r", len(self.remote_basebackup[site]), allowed_basebackup_count, self.remote_basebackup[site], - self.remote_basebackup[site][0]["name"]) + basebackup["name"]) basebackups_to_delete.append(basebackup) + for basebackup in basebackups_to_delete: + remote_basebackups.remove(basebackup) backup_interval = datetime.timedelta(hours=site_config["basebackup_interval_hours"]) min_backups = site_config["basebackup_count_min"] max_age_days = site_config.get("basebackup_age_days_max") current_time = datetime.datetime.now(datetime.timezone.utc) if max_age_days and min_backups > 0: - while len(self.remote_basebackup[site]) > min_backups: + for basebackup in remote_basebackups: + if (len(remote_basebackups) - len(basebackups_to_delete)) <= min_backups: + break # For age checks we treat the age as current_time - (backup_start_time + backup_interval). So when # backup interval is set to 24 hours a backup started 2.5 days ago would be considered to be 1.5 days old. - completed_at = self.remote_basebackup[site][0]["metadata"]["start-time"] + backup_interval + completed_at = basebackup["metadata"]["start-time"] + backup_interval backup_age = current_time - completed_at # timedelta would have direct `days` attribute but that's an integer rounded down. We want a float # so that we can react immediately when age is too old backup_age_days = backup_age.total_seconds() / 60.0 / 60.0 / 24.0 if backup_age_days > max_age_days: self.log.warning("Basebackup %r too old: %.3f > %.3f, %r, starting to get rid of it", - self.remote_basebackup[site][0]["name"], + basebackup["name"], backup_age_days, max_age_days, self.remote_basebackup) - basebackups_to_delete.append(self.remote_basebackup[site].pop(0)) + basebackups_to_delete.append(basebackup) else: break diff --git a/test/test_pghoard.py b/test/test_pghoard.py index 919cb0a3..2116e9d4 100644 --- a/test/test_pghoard.py +++ b/test/test_pghoard.py @@ -132,7 +132,8 @@ def test_determine_backups_to_delete(self): self.pghoard.config["backup_sites"][self.test_site] = site_config self.pghoard.remote_basebackup[self.test_site] = bbs to_delete = self.pghoard.determine_backups_to_delete(self.test_site) - assert len(bbs) - len(to_delete) == 4 + assert len(bbs) - len(to_delete) == basebackup_count + # check that pghoard delete oldest basebackups (first items of the list) assert to_delete == bbs[:len(to_delete)] site_config["basebackup_count"] = 16 @@ -143,30 +144,29 @@ def test_determine_backups_to_delete(self): assert to_delete == bbs[:len(to_delete)] site_config["basebackup_count"] = 9 + site_config["basebackup_age_days_max"] = 10 to_delete = self.pghoard.determine_backups_to_delete(self.test_site) # basebackup_count trumps backup age and backups are removed even though they're not too old - assert len(to_delete) == 9 + # We have 13 basebackups, 12 with start-time < 8 days + # So based with basebackup_count = 9 pghoard should delete 4 backups (bb1, bb2, bb3, bb4) + # And with basebackup_age_days_max = 10 days pghoard should delete 1 backup (bb1) + assert len(to_delete) == 4 assert to_delete == bbs[:len(to_delete)] + basebackup_count_min = 6 site_config["basebackup_count"] = 16 site_config["basebackup_age_days_max"] = 2 - site_config["basebackup_count_min"] = 6 - bbs_copy = list(bbs) + site_config["basebackup_count_min"] = basebackup_count_min to_delete = self.pghoard.determine_backups_to_delete(self.test_site) # basebackup_count_min ensures not that many backups are removed even though they're too old - assert len(bbs_copy) == 6 - assert len(to_delete) == len(bbs) - len(bbs_copy) + assert len(to_delete) == len(self.pghoard.remote_basebackup[self.test_site]) - basebackup_count_min assert to_delete == bbs[:len(to_delete)] - assert bbs_copy == bbs[len(to_delete):] site_config["basebackup_count_min"] = 2 - bbs_copy = list(bbs) to_delete = self.pghoard.determine_backups_to_delete(self.test_site) # 3 of the backups are new enough (start time less than 3 days in the past) - assert len(bbs_copy) == 3 - assert len(to_delete) == len(bbs) - len(bbs_copy) + assert len(to_delete) == len(self.pghoard.remote_basebackup[self.test_site]) - 3 assert to_delete == bbs[:len(to_delete)] - assert bbs_copy == bbs[len(to_delete):] def test_local_refresh_backup_list_and_delete_old(self): basebackup_storage_path = os.path.join(self.local_storage_dir, "basebackup") From 3d5f99ae05eb33210bd8df416828a72491c0645f Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Wed, 2 Oct 2019 11:01:57 +0200 Subject: [PATCH 12/18] Remove xlog from pghoard.remote_xlog Simply deletion of remote wal segment --- pghoard/pghoard.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pghoard/pghoard.py b/pghoard/pghoard.py index 086fdf18..f6ca9a8c 100644 --- a/pghoard/pghoard.py +++ b/pghoard/pghoard.py @@ -258,6 +258,7 @@ def delete_remote_wal_before(self, wal_segment, site, pg_version): self.log.debug("Deleting wal_file: %r", wal_path) try: storage.delete_key(wal_path) + self.remote_xlog[site].remove(wal.name_for_tli_log_seg(tli, log, seg)) valid_timeline = True except FileNotFoundFromStorageError: if not valid_timeline or tli <= 1: @@ -410,17 +411,14 @@ def refresh_backup_list_and_delete_old(self, site): # but still check explicitly here to ensure we certainly won't delete anything unexpectedly if site_config["active"]: basebackups_to_delete = self.determine_backups_to_delete(site) - for basebackup_to_be_deleted in basebackups_to_delete: - pg_version = basebackup_to_be_deleted["metadata"].get("pg-version") - last_wal_segment_still_needed = 0 - if len(self.remote_basebackup[site]) > 0: - last_wal_segment_still_needed = self.remote_basebackup[site][0]["metadata"]["start-wal-segment"] - - if last_wal_segment_still_needed: - self.delete_remote_wal_before(last_wal_segment_still_needed, site, pg_version) self.delete_remote_basebackup(site, basebackup_to_be_deleted) + if len(basebackups_to_delete) > 0 and len(self.remote_basebackup[site]) > 0: + pg_version = basebackups_to_delete[0]["metadata"].get("pg-version") + last_wal_segment_still_needed = self.remote_basebackup[site][0]["metadata"]["start-wal-segment"] + self.delete_remote_wal_before(last_wal_segment_still_needed, site, pg_version) + def get_normalized_backup_time(self, site_config, *, now=None): """Returns the closest historical backup time that current time matches to (or current time if it matches). E.g. if backup hour is 13, backup minute is 50, current time is 15:40 and backup interval is 60 minutes, From 6a0f0c99d5ca6057a6c8dd1fe0ecfec2c9237e05 Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Wed, 2 Oct 2019 11:04:18 +0200 Subject: [PATCH 13/18] Fix unit tests --- test/test_pghoard.py | 45 +++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/test/test_pghoard.py b/test/test_pghoard.py index 2116e9d4..a75e42af 100644 --- a/test/test_pghoard.py +++ b/test/test_pghoard.py @@ -175,7 +175,10 @@ def test_local_refresh_backup_list_and_delete_old(self): os.makedirs(wal_storage_path) self.pghoard.set_state_defaults(self.test_site) - assert self.pghoard.get_remote_basebackups_info(self.test_site) == [] + self.pghoard.remote_basebackup[self.test_site] = self.pghoard.get_remote_basebackups_info(self.test_site) + self.pghoard.remote_xlog[self.test_site] = self.pghoard.get_remote_xlogs_info(self.test_site) + assert self.pghoard.remote_basebackup[self.test_site] == [] + assert self.pghoard.remote_xlog[self.test_site] == [] def write_backup_and_wal_files(what): for bb, wals in what.items(): @@ -191,8 +194,11 @@ def write_backup_and_wal_files(what): "start-time": start_time.isoformat(), }, fp) for wal in wals: - with open(os.path.join(wal_storage_path, wal), "wb") as fp: + wal_path = os.path.join(wal_storage_path, wal) + with open(wal_path, "wb") as fp: fp.write(b"something") + with open(wal_path + ".metadata", "w") as fp: + json.dump({}, fp) backups_and_wals = { "2015-08-25_0": [ @@ -216,12 +222,15 @@ def write_backup_and_wal_files(what): ], } write_backup_and_wal_files(backups_and_wals) - basebackups = self.pghoard.get_remote_basebackups_info(self.test_site) - assert len(basebackups) == 4 + self.pghoard.remote_basebackup[self.test_site] = self.pghoard.get_remote_basebackups_info(self.test_site) + self.pghoard.remote_xlog[self.test_site] = self.pghoard.get_remote_xlogs_info(self.test_site) + assert len(self.pghoard.remote_basebackup[self.test_site]) == 4 + assert len(self.pghoard.remote_xlog[self.test_site]) == 9 self.pghoard.refresh_backup_list_and_delete_old(self.test_site) - basebackups = self.pghoard.get_remote_basebackups_info(self.test_site) - assert len(basebackups) == 1 - assert len(os.listdir(wal_storage_path)) == 3 + self.pghoard.remote_basebackup[self.test_site] = self.pghoard.get_remote_basebackups_info(self.test_site) + assert len(self.pghoard.remote_basebackup[self.test_site]) == 1 + assert len(self.pghoard.remote_xlog[self.test_site]) == 3 + assert len(os.listdir(wal_storage_path)) == 2 * len(self.pghoard.remote_xlog[self.test_site]) # Put all WAL segments between 1 and 9 in place to see that they're deleted and we don't try to go back # any further from TLI 1. Note that timeline 3 is now "empty" so deletion shouldn't touch timelines 2 # or 1. @@ -240,14 +249,17 @@ def write_backup_and_wal_files(what): ], } write_backup_and_wal_files(new_backups_and_wals) - assert len(os.listdir(wal_storage_path)) == 11 + self.pghoard.remote_basebackup[self.test_site] = self.pghoard.get_remote_basebackups_info(self.test_site) + self.pghoard.remote_xlog[self.test_site] = self.pghoard.get_remote_xlogs_info(self.test_site) + assert len(self.pghoard.remote_xlog[self.test_site]) == 11 + assert len(os.listdir(wal_storage_path)) == 2 * len(self.pghoard.remote_xlog[self.test_site]) self.pghoard.refresh_backup_list_and_delete_old(self.test_site) - basebackups = self.pghoard.get_remote_basebackups_info(self.test_site) - assert len(basebackups) == 1 + assert len(self.pghoard.remote_basebackup[self.test_site]) == 1 expected_wal_count = len(backups_and_wals["2015-08-25_0"]) expected_wal_count += len(new_backups_and_wals[""]) expected_wal_count += len(new_backups_and_wals["2015-08-25_4"]) - assert len(os.listdir(wal_storage_path)) == expected_wal_count + assert len(self.pghoard.remote_xlog[self.test_site]) == expected_wal_count + assert len(os.listdir(wal_storage_path)) == 2 * len(self.pghoard.remote_xlog[self.test_site]) # Now put WAL files in place with no gaps anywhere gapless_backups_and_wals = { "2015-08-25_3": [ @@ -259,11 +271,14 @@ def write_backup_and_wal_files(what): ], } write_backup_and_wal_files(gapless_backups_and_wals) - assert len(os.listdir(wal_storage_path)) >= 10 + self.pghoard.remote_basebackup[self.test_site] = self.pghoard.get_remote_basebackups_info(self.test_site) + self.pghoard.remote_xlog[self.test_site] = self.pghoard.get_remote_xlogs_info(self.test_site) + assert len(self.pghoard.remote_xlog[self.test_site]) >= 10 + assert len(os.listdir(wal_storage_path)) == 2 * len(self.pghoard.remote_xlog[self.test_site]) self.pghoard.refresh_backup_list_and_delete_old(self.test_site) - basebackups = self.pghoard.get_remote_basebackups_info(self.test_site) - assert len(basebackups) == 1 - assert len(os.listdir(wal_storage_path)) == 1 + assert len(self.pghoard.remote_basebackup[self.test_site]) == 1 + assert len(self.pghoard.remote_xlog[self.test_site]) == 1 + assert len(os.listdir(wal_storage_path)) == 2 * len(self.pghoard.remote_xlog[self.test_site]) def test_alert_files(self): alert_file_path = os.path.join(self.config["alert_file_dir"], "test_alert") From cbdf9c9d2d3e3827e4b1672b469e3662b7daaf6e Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Wed, 2 Oct 2019 11:37:36 +0200 Subject: [PATCH 14/18] Setup remote xlog and basebackup attributes for tests --- test/conftest.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/conftest.py b/test/conftest.py index 2c2ae616..c862e3a7 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -274,6 +274,10 @@ def pghoard_base(db, tmpdir, request, compression="snappy", # pylint: disable= pgh = PGHoard(confpath) pgh.test_site = test_site + pgh.remote_xlog = {} + pgh.remote_basebackup = {} + pgh.remote_xlog[pgh.test_site] = [] + pgh.remote_basebackup[pgh.test_site] = [] pgh.start_threads_on_startup() if compression == "snappy": pgh.Compressor = snappy.StreamCompressor From 0defbcec3ee18edc8f9ded58b490d6d9cdd3bf63 Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Wed, 2 Oct 2019 11:46:32 +0200 Subject: [PATCH 15/18] Fix tests --- test/test_basebackup.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/test/test_basebackup.py b/test/test_basebackup.py index b2f75897..b60a7a60 100644 --- a/test/test_basebackup.py +++ b/test/test_basebackup.py @@ -193,8 +193,6 @@ def _test_create_basebackup(self, capsys, db, pghoard, mode, replica=False, acti pghoard.config["backup_sites"][pghoard.test_site]["basebackup_mode"] = mode pghoard.config["backup_sites"][pghoard.test_site]["active_backup_mode"] = active_backup_mode - pghoard.remote_xlog[pghoard.test_site] = [] - pghoard.remote_basebackup[pghoard.test_site] = [] now = datetime.datetime.now(datetime.timezone.utc) metadata = { @@ -536,7 +534,6 @@ def test_handle_site(self, pghoard): site_config = deepcopy(pghoard.config["backup_sites"][pghoard.test_site]) site_config["basebackup_interval_hours"] = 1 / 3600 assert pghoard.basebackups == {} - assert pghoard.test_site not in pghoard.remote_basebackup # initialize with a single backup backup_start = time.monotonic() From c1541b78853c618abe95dccae1a502aa6092138a Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Wed, 2 Oct 2019 12:10:19 +0200 Subject: [PATCH 16/18] Fix lint --- test/test_transferagent.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test/test_transferagent.py b/test/test_transferagent.py index d339ac92..c20c834c 100644 --- a/test/test_transferagent.py +++ b/test/test_transferagent.py @@ -26,14 +26,13 @@ class MockStorage(Mock): def init(self): - if self.init_ok is not True: + if self.init_ok is not True: # pylint: disable=access-member-before-definition self.objects = {} now = datetime.datetime.now(dateutil.tz.tzlocal()) self.sample_storage_date = "{} {}".format(now.isoformat().split("+", 1)[0], now.tzname()) self.init_ok = True - def setup_method(self, method): - super().setup_method(method) + def setup_method(self): self.init() def get_contents_to_string(self, key): # pylint: disable=unused-argument @@ -44,9 +43,9 @@ def store_file_from_disk(self, key, local_path, metadata, multipart=None): # py self.init() self.objects[key] = local_path - def iter_key(self, key, *, with_metadata=True, deep=False, include_key=False): + def iter_key(self, key, *, with_metadata=True, deep=False, include_key=False): # pylint: disable=unused-argument self.init() - for item in self.objects.keys(): + for item in self.objects: if key == item[0:len(key)]: yield IterKeyItem( type=KEY_TYPE_OBJECT, From 2296486ff834f6ed8a9980babfff7d7aaf3b7342 Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Wed, 2 Oct 2019 12:31:24 +0200 Subject: [PATCH 17/18] Fix comments --- test/conftest.py | 2 ++ test/test_basebackup.py | 3 --- test/test_pghoard.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/test/conftest.py b/test/conftest.py index c862e3a7..316473ab 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -278,6 +278,8 @@ def pghoard_base(db, tmpdir, request, compression="snappy", # pylint: disable= pgh.remote_basebackup = {} pgh.remote_xlog[pgh.test_site] = [] pgh.remote_basebackup[pgh.test_site] = [] + pgh.set_state_defaults(pgh.test_site) + pgh.state["backup_sites"][pgh.test_site]["basebackups"] = pgh.remote_basebackup[pgh.test_site] pgh.start_threads_on_startup() if compression == "snappy": pgh.Compressor = snappy.StreamCompressor diff --git a/test/test_basebackup.py b/test/test_basebackup.py index b60a7a60..068ef09f 100644 --- a/test/test_basebackup.py +++ b/test/test_basebackup.py @@ -606,9 +606,6 @@ def test_get_new_backup_details(self, pghoard): site = pghoard.test_site pghoard.set_state_defaults(site) - pghoard.remote_xlog[site] = pghoard.get_remote_xlogs_info(site) - pghoard.remote_basebackup[site] = pghoard.get_remote_basebackups_info(site) - pghoard.state["backup_sites"][site]["basebackups"] = pghoard.remote_basebackup[site] site_config = pghoard.config["backup_sites"][site] # No backups, one should be created. No backup schedule defined so normalized backup time is None diff --git a/test/test_pghoard.py b/test/test_pghoard.py index a75e42af..4611b167 100644 --- a/test/test_pghoard.py +++ b/test/test_pghoard.py @@ -147,7 +147,7 @@ def test_determine_backups_to_delete(self): site_config["basebackup_age_days_max"] = 10 to_delete = self.pghoard.determine_backups_to_delete(self.test_site) # basebackup_count trumps backup age and backups are removed even though they're not too old - # We have 13 basebackups, 12 with start-time < 8 days + # We have 13 basebackups, 12 with start-time < 10 days # So based with basebackup_count = 9 pghoard should delete 4 backups (bb1, bb2, bb3, bb4) # And with basebackup_age_days_max = 10 days pghoard should delete 1 backup (bb1) assert len(to_delete) == 4 From 9239570ff624cd2916c8e897dadbdc2c726e72a1 Mon Sep 17 00:00:00 2001 From: Julien Acroute Date: Wed, 2 Oct 2019 14:15:45 +0200 Subject: [PATCH 18/18] Fix lint error in tests --- test/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/base.py b/test/base.py index bd66decd..453bfb22 100644 --- a/test/base.py +++ b/test/base.py @@ -101,7 +101,7 @@ def setup_method(self, method): self.remote_xlog[self.test_site] = [] self.remote_basebackup[self.test_site] = [] - def teardown_method(self, method): + def teardown_method(self, method): # pylint: disable=unused-argument rmtree(self.temp_dir) def patch_basebackup_info(self, *, entry, site_config): # pylint: disable=unused-argument