Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TEST] Rotate pgbackrest logs patroni timeout #654

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
PGBACKREST_CONF_PATH,
PGBACKREST_CONFIGURATION_FILE,
PGBACKREST_EXECUTABLE,
PGBACKREST_LOGROTATE_FILE,
PGBACKREST_LOGS_PATH,
POSTGRESQL_DATA_PATH,
)
Expand Down Expand Up @@ -1153,6 +1154,13 @@ def _render_pgbackrest_conf_file(self) -> bool:
# Render pgBackRest config file.
self.charm._patroni.render_file(f"{PGBACKREST_CONF_PATH}/pgbackrest.conf", rendered, 0o644)

# Render the logrotate configuration file.
with open("templates/pgbackrest.logrotate.j2") as file:
template = Template(file.read())
self.charm._patroni.render_file(
PGBACKREST_LOGROTATE_FILE, template.render(), 0o644, change_owner=False
)

return True

def _restart_database(self) -> None:
Expand Down
3 changes: 3 additions & 0 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
)
from relations.db import EXTENSIONS_BLOCKING_MESSAGE, DbProvides
from relations.postgresql_provider import PostgreSQLProvider
from rotate_logs import RotateLogs
from upgrade import PostgreSQLUpgrade, get_postgresql_dependencies_model
from utils import new_password

Expand Down Expand Up @@ -170,6 +171,7 @@ def __init__(self, *args):
juju_version = JujuVersion.from_environ()
run_cmd = "/usr/bin/juju-exec" if juju_version.major > 2 else "/usr/bin/juju-run"
self._observer = ClusterTopologyObserver(self, run_cmd)
self._rotate_logs = RotateLogs(self)
self.framework.observe(self.on.cluster_topology_change, self._on_cluster_topology_change)
self.framework.observe(self.on.install, self._on_install)
self.framework.observe(self.on.leader_elected, self._on_leader_elected)
Expand Down Expand Up @@ -203,6 +205,7 @@ def __init__(self, *args):
charm=self, relation="restart", callback=self._restart
)
self._observer.start_observer()
self._rotate_logs.start_log_rotation()
self._grafana_agent = COSAgentProvider(
self,
metrics_endpoints=[{"path": "/metrics", "port": METRICS_PORT}],
Expand Down
9 changes: 6 additions & 3 deletions src/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

RUNNING_STATES = ["running", "streaming"]

PATRONI_TIMEOUT = 10
PATRONI_TIMEOUT = 60


class ClusterNotPromotedError(Exception):
Expand Down Expand Up @@ -546,14 +546,16 @@ def promote_standby_cluster(self) -> None:
if self.get_primary() is None:
raise ClusterNotPromotedError("cluster not promoted")

def render_file(self, path: str, content: str, mode: int) -> None:
def render_file(self, path: str, content: str, mode: int, change_owner: bool = True) -> None:
"""Write a content rendered from a template to a file.

Args:
path: the path to the file.
content: the data to be written to the file.
mode: access permission mask applied to the
file using chmod (e.g. 0o640).
change_owner: whether to change the file owner
to the snap_daemon user.
"""
# TODO: keep this method to use it also for generating replication configuration files and
# move it to an utils / helpers file.
Expand All @@ -562,7 +564,8 @@ def render_file(self, path: str, content: str, mode: int) -> None:
file.write(content)
# Ensure correct permissions are set on the file.
os.chmod(path, mode)
self._change_owner(path)
if change_owner:
self._change_owner(path)

def render_patroni_yml_file(
self,
Expand Down
2 changes: 2 additions & 0 deletions src/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,5 @@
PLUGIN_OVERRIDES = {"audit": "pgaudit", "uuid_ossp": '"uuid-ossp"'}

SPI_MODULE = ["refint", "autoinc", "insert_username", "moddatetime"]

PGBACKREST_LOGROTATE_FILE = "/etc/logrotate.d/pgbackrest.logrotate"
72 changes: 72 additions & 0 deletions src/rotate_logs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.

"""Background process for rotating logs."""

import logging
import os
import subprocess
from time import sleep

from ops.charm import CharmBase
from ops.framework import Object
from ops.model import ActiveStatus

from constants import PGBACKREST_LOGROTATE_FILE

logger = logging.getLogger(__name__)

# File path for the spawned rotate logs process to write logs.
LOG_FILE_PATH = "/var/log/rotate_logs.log"


class RotateLogs(Object):
"""Rotate logs every minute."""

def __init__(self, charm: CharmBase):
super().__init__(charm, "rotate-logs")
self._charm = charm

def start_log_rotation(self):
"""Start the rotate logs running in a new process."""
if (
not isinstance(self._charm.unit.status, ActiveStatus)
or self._charm._peers is None
or not os.path.exists(PGBACKREST_LOGROTATE_FILE)
):
return
if "rotate-logs-pid" in self._charm.unit_peer_data:
# Double check that the PID exists.
pid = int(self._charm.unit_peer_data["rotate-logs-pid"])
try:
os.kill(pid, 0)
return
except OSError:
pass

logging.info("Starting rotate logs process")

# Input is generated by the charm
pid = subprocess.Popen( # noqa: S603
["/usr/bin/python3", "src/rotate_logs.py"],
# File should not close
stdout=open(LOG_FILE_PATH, "a"), # noqa: SIM115
stderr=subprocess.STDOUT,
).pid

self._charm.unit_peer_data.update({"rotate-logs-pid": f"{pid}"})
logging.info(f"Started rotate logs process with PID {pid}")


def main():
"""Main loop that calls logrotate."""
while True:
# Input is constant
subprocess.run(["/usr/sbin/logrotate", "-f", PGBACKREST_LOGROTATE_FILE]) # noqa: S603

# Wait 60 seconds before executing logrotate again.
sleep(60)


if __name__ == "__main__":
main()
10 changes: 10 additions & 0 deletions templates/pgbackrest.logrotate.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/var/snap/charmed-postgresql/common/var/log/pgbackrest/*.log {
rotate 10
missingok
notifempty
nocompress
daily
create 0600 snap_daemon snap_daemon
dateext
dateformat -%Y%m%d_%H:%M.log
}
13 changes: 12 additions & 1 deletion tests/unit/test_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -1691,13 +1691,24 @@ def test_render_pgbackrest_conf_file(harness, tls_ca_chain_filename):
# Check the template is opened read-only in the call to open.
assert mock.call_args_list[0][0] == ("templates/pgbackrest.conf.j2",)

# Get the expected content from a file.
with open("templates/pgbackrest.conf.j2") as file:
template = Template(file.read())
log_rotation_expected_content = template.render()

# Ensure the correct rendered template is sent to _render_file method.
calls = [
call(
"/var/snap/charmed-postgresql/current/etc/pgbackrest/pgbackrest.conf",
expected_content,
0o644,
)
),
call(
"/etc/logrotate.d/pgbackrest.logrotate",
log_rotation_expected_content,
0o644,
change_owner=False,
),
]
if tls_ca_chain_filename != "":
calls.insert(0, call(tls_ca_chain_filename, "fake-tls-ca-chain", 0o644))
Expand Down
11 changes: 11 additions & 0 deletions tests/unit/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,17 @@ def test_render_file(peers_ips, patroni):
# Ensure the file is chown'd correctly.
_chown.assert_called_with(filename, uid=35, gid=35)

# Test when it's requested to not change the file owner.
mock.reset_mock()
_pwnam.reset_mock()
_chmod.reset_mock()
_chown.reset_mock()
with patch("builtins.open", mock, create=True):
patroni.render_file(filename, "rendered-content", 0o640, change_owner=False)
_pwnam.assert_not_called()
_chmod.assert_called_once_with(filename, 0o640)
_chown.assert_not_called()


def test_render_patroni_yml_file(peers_ips, patroni):
with (
Expand Down
98 changes: 98 additions & 0 deletions tests/unit/test_rotate_logs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.
from unittest.mock import Mock, PropertyMock, patch

import pytest
from ops.charm import CharmBase
from ops.model import ActiveStatus, Relation, WaitingStatus
from ops.testing import Harness

from rotate_logs import RotateLogs


class MockCharm(CharmBase):
def __init__(self, *args):
super().__init__(*args)

self.rotate_logs = RotateLogs(self)

@property
def _peers(self) -> Relation | None:
return None

@property
def unit_peer_data(self) -> dict:
"""Unit peer relation data object."""
if self._peers is None:
return {}

return self._peers.data[self.unit]


@pytest.fixture(autouse=True)
def harness():
harness = Harness(MockCharm, meta="name: test-charm")
harness.begin()
yield harness
harness.cleanup()


def test_start_log_rotation(harness):
with (
patch("builtins.open") as _open,
patch("subprocess.Popen") as _popen,
patch("os.path.exists") as _exists,
patch.object(MockCharm, "_peers", new_callable=PropertyMock) as _peers,
):
# Test that nothing is done if there is already a running process.
_peers.return_value = Mock(data={harness.charm.unit: {"rotate-logs-pid": "1"}})
_exists.return_value = True
harness.charm.rotate_logs.start_log_rotation()
_popen.assert_not_called()

# Test that nothing is done if the charm is not in an active status.
harness.charm.unit.status = WaitingStatus()
_peers.return_value = Mock(data={harness.charm.unit: {}})
harness.charm.rotate_logs.start_log_rotation()
_popen.assert_not_called()

# Test that nothing is done if peer relation is not available yet.
harness.charm.unit.status = ActiveStatus()
_peers.return_value = None
harness.charm.rotate_logs.start_log_rotation()
_popen.assert_not_called()

# Test that nothing is done if the logrotate file does not exist.
_peers.return_value = Mock(data={harness.charm.unit: {}})
_exists.return_value = False
harness.charm.rotate_logs.start_log_rotation()
_popen.assert_not_called()

# Test that nothing is done if there is already a running process.
_popen.return_value = Mock(pid=1)
_exists.return_value = True
harness.charm.rotate_logs.start_log_rotation()
_popen.assert_called_once()


def test_start_log_rotation_already_running(harness):
with (
patch("builtins.open") as _open,
patch("subprocess.Popen") as _popen,
patch("os.kill") as _kill,
patch("os.path.exists") as _exists,
patch.object(MockCharm, "_peers", new_callable=PropertyMock) as _peers,
):
harness.charm.unit.status = ActiveStatus()
_peers.return_value = Mock(data={harness.charm.unit: {"rotate-logs-pid": "1234"}})
_exists.return_value = True
harness.charm.rotate_logs.start_log_rotation()
_kill.assert_called_once_with(1234, 0)
assert not _popen.called
_kill.reset_mock()

# If process is already dead, it should restart.
_kill.side_effect = OSError
harness.charm.rotate_logs.start_log_rotation()
_kill.assert_called_once_with(1234, 0)
_popen.assert_called_once()