diff --git a/src/charm.py b/src/charm.py index 6a5e448a6d..7989c5ab66 100755 --- a/src/charm.py +++ b/src/charm.py @@ -9,6 +9,7 @@ import logging import os import re +import shutil import sys from pathlib import Path from typing import Dict, List, Literal, Optional, Tuple, get_args @@ -124,6 +125,7 @@ EXTENSIONS_DEPENDENCY_MESSAGE = "Unsatisfied plugin dependencies. Please check the logs" EXTENSION_OBJECT_MESSAGE = "Cannot disable plugins: Existing objects depend on it. See logs" +INSUFFICIENT_SIZE_WARNING = "<10% free space on pgdata volume." ORIGINAL_PATRONI_ON_FAILURE_CONDITION = "restart" @@ -205,6 +207,7 @@ def __init__(self, *args): self.framework.observe(self.on.get_primary_action, self._on_get_primary) self.framework.observe(self.on.update_status, self._on_update_status) self._storage_path = self.meta.storages["pgdata"].location + self.pgdata_path = f"{self._storage_path}/pgdata" self.upgrade = PostgreSQLUpgrade( self, @@ -861,10 +864,9 @@ def fix_leader_annotation(self) -> bool: def _create_pgdata(self, container: Container): """Create the PostgreSQL data directory.""" - path = f"{self._storage_path}/pgdata" - if not container.exists(path): + if not container.exists(self.pgdata_path): container.make_dir( - path, permissions=0o770, user=WORKLOAD_OS_USER, group=WORKLOAD_OS_GROUP + self.pgdata_path, permissions=0o770, user=WORKLOAD_OS_USER, group=WORKLOAD_OS_GROUP ) # Also, fix the permissions from the parent directory. container.exec([ @@ -940,6 +942,9 @@ def _on_postgresql_pebble_ready(self, event: WorkloadEvent) -> None: self._set_active_status() def _set_active_status(self): + # The charm should not override this status outside of the function checking disk space. + if self.unit.status.message == INSUFFICIENT_SIZE_WARNING: + return if "require-change-bucket-after-restore" in self.app_peer_data: if self.unit.is_leader(): self.app_peer_data.update({ @@ -1328,12 +1333,34 @@ def _on_update_status_early_exit_checks(self, container) -> bool: return False return True + def _check_pgdata_storage_size(self) -> None: + """Asserts that pgdata volume has at least 10% free space and blocks charm if not.""" + try: + total_size, _, free_size = shutil.disk_usage(self.pgdata_path) + except FileNotFoundError: + logger.error("pgdata folder not found in %s", self.pgdata_path) + return + + logger.debug( + "pgdata free disk space: %s out of %s, ratio of %s", + free_size, + total_size, + free_size / total_size, + ) + if free_size / total_size < 0.1: + self.unit.status = BlockedStatus(INSUFFICIENT_SIZE_WARNING) + elif self.unit.status.message == INSUFFICIENT_SIZE_WARNING: + self.unit.status = ActiveStatus() + self._set_active_status() + def _on_update_status(self, _) -> None: """Update the unit status message.""" container = self.unit.get_container("postgresql") if not self._on_update_status_early_exit_checks(container): return + self._check_pgdata_storage_size() + if self._has_blocked_status or self._has_non_restore_waiting_status: # If charm was failing to disable plugin, try again (user may have removed the objects) if self.unit.status.message == EXTENSION_OBJECT_MESSAGE: diff --git a/tests/integration/test_storage.py b/tests/integration/test_storage.py new file mode 100644 index 0000000000..fe936685aa --- /dev/null +++ b/tests/integration/test_storage.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +import logging + +import pytest +from pytest_operator.plugin import OpsTest + +from .helpers import ( + DATABASE_APP_NAME, + STORAGE_PATH, + build_and_deploy, + get_primary, + run_command_on_unit, +) + +logger = logging.getLogger(__name__) + +MAX_RETRIES = 20 +INSUFFICIENT_SIZE_WARNING = "<10% free space on pgdata volume." + + +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_filling_and_emptying_pgdata_storage(ops_test: OpsTest): + """Build and deploy the charm and saturate its pgdata volume.""" + # Build and deploy the PostgreSQL charm. + async with ops_test.fast_forward(): + await build_and_deploy(ops_test, 1) + + # Saturate pgdata storage with random data + primary = await get_primary(ops_test, DATABASE_APP_NAME) + await run_command_on_unit( + ops_test, + primary, + f"FREE_SPACE=$(df --output=avail {STORAGE_PATH}/pgdata | tail -1) && dd if=/dev/urandom of={STORAGE_PATH}/pgdata/tmp bs=1M count=$(( (FREE_SPACE * 91 / 100) / 1024 ))", + ) + + # wait for charm to get blocked + async with ops_test.fast_forward(): + await ops_test.model.block_until( + lambda: any( + unit.workload_status == "blocked" + and unit.workload_status_message == INSUFFICIENT_SIZE_WARNING + for unit in ops_test.model.applications[DATABASE_APP_NAME].units + ), + timeout=500, + ) + + # Delete big file to release storage space + await run_command_on_unit(ops_test, primary, f"rm {STORAGE_PATH}/pgdata/tmp") + + # wait for charm to resolve + await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1000)