Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DPE-2290] Upgrade integration tests #191

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
83793ab
Added initial upgrade implementation
marceloneppel Jul 17, 2023
dfdaf1f
Merge remote-tracking branch 'origin/main' into dpe-1768-minor-versio…
marceloneppel Jul 19, 2023
5f3fa35
Updated the code with the new library
marceloneppel Jul 21, 2023
aa2370c
Merge remote-tracking branch 'origin/main' into dpe-1768-minor-versio…
marceloneppel Jul 25, 2023
c06199f
Improved code and added unit tests
marceloneppel Jul 25, 2023
0f2f942
Added one more check in unit test
marceloneppel Jul 25, 2023
339830b
Removed upgrade integration tests
marceloneppel Jul 25, 2023
fc35a4b
Revert "Removed upgrade integration tests"
marceloneppel Jul 25, 2023
6ad0562
Added replication health check and snap dependency
marceloneppel Jul 28, 2023
735b654
Added replication health check and snap dependency
marceloneppel Aug 2, 2023
a7ba226
Merge remote-tracking branch 'origin/main' into dpe-1768-minor-versio…
marceloneppel Aug 2, 2023
9e97a49
Remove dependencies version hashes
marceloneppel Aug 2, 2023
cabe3f7
Merge remote-tracking branch 'origin/dpe-1768-minor-version-upgrade' …
marceloneppel Aug 4, 2023
5322147
Merge remote-tracking branch 'origin/main' into dpe-2290-upgrade-inte…
marceloneppel Aug 14, 2023
47b4d73
Merge remote-tracking branch 'origin/main' into dpe-2290-upgrade-inte…
marceloneppel Aug 16, 2023
6103cea
Add logic to update dependencies file
marceloneppel Aug 16, 2023
4719c1f
Add extra tests
marceloneppel Aug 21, 2023
ea75d93
Merge remote-tracking branch 'origin/main' into dpe-2290-upgrade-inte…
marceloneppel Sep 20, 2023
fad15ce
Merge remote-tracking branch 'origin/main' into dpe-2290-upgrade-inte…
marceloneppel Sep 27, 2023
8b60c29
Implement upgrade from stable logic
marceloneppel Sep 28, 2023
539ea20
Fix single unit cluster upgrade
marceloneppel Sep 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ jobs:
- password-rotation-integration
- plugins-integration
- tls-integration
- upgrade-integration
agent-versions:
- "2.9.44" # renovate: latest juju 2
- "3.1.5" # renovate: latest juju 3
Expand Down
18 changes: 18 additions & 0 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,14 @@ def __init__(self, *args):
log_slots=[f"{POSTGRESQL_SNAP_NAME}:logs"],
)

@property
def app_units(self) -> set[Unit]:
"""The peer-related units in the application."""
if not self._peers:
return set()

return {self.unit, *self._peers.units}

@property
def app_peer_data(self) -> Dict:
"""Application peer relation data object."""
Expand Down Expand Up @@ -935,6 +943,12 @@ def _can_start(self, event: StartEvent) -> bool:
self._reboot_on_detached_storage(event)
return False

# Safeguard against starting while upgrading.
if not self.upgrade.idle:
logger.debug("Defer on_start: Cluster is upgrading")
event.defer()
return False

# Doesn't try to bootstrap the cluster if it's in a blocked state
# caused, for example, because a failed installation of packages.
if self.is_blocked:
Expand Down Expand Up @@ -981,6 +995,10 @@ def _setup_exporter(self) -> None:
cache = snap.SnapCache()
postgres_snap = cache[POSTGRESQL_SNAP_NAME]

if postgres_snap.revision != list(filter(lambda snap_package: snap_package[0] == POSTGRESQL_SNAP_NAME, SNAP_PACKAGES))[0][1]["revision"]:
logger.debug("Early exit _setup_exporter: snap was not refreshed to the right version yet")
return

postgres_snap.set(
{
"exporter.user": MONITORING_USER,
Expand Down
71 changes: 69 additions & 2 deletions src/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@
DependencyModel,
UpgradeGrantedEvent,
)
from ops.model import ActiveStatus, MaintenanceStatus, WaitingStatus
from ops.model import ActiveStatus, MaintenanceStatus, RelationDataContent, WaitingStatus
from pydantic import BaseModel
from tenacity import RetryError, Retrying, stop_after_attempt, wait_fixed
from typing_extensions import override

from constants import SNAP_PACKAGES
from constants import APP_SCOPE, MONITORING_PASSWORD_KEY, MONITORING_USER, SNAP_PACKAGES
from utils import new_password

logger = logging.getLogger(__name__)

Expand All @@ -26,6 +27,7 @@ class PostgreSQLDependencyModel(BaseModel):
"""PostgreSQL dependencies model."""

charm: DependencyModel
snap: DependencyModel


def get_postgresql_dependencies_model() -> PostgreSQLDependencyModel:
Expand All @@ -42,6 +44,9 @@ def __init__(self, charm, model: BaseModel, **kwargs) -> None:
"""Initialize the class."""
super().__init__(charm, model, **kwargs)
self.charm = charm
# self.framework.observe(self.charm.on.upgrade_charm, self._on_upgrade_charm_check_legacy)
logger.error("running")
self._on_upgrade_charm_check_legacy(None)

@override
def build_upgrade_stack(self) -> List[int]:
Expand Down Expand Up @@ -77,6 +82,28 @@ def log_rollback_instructions(self) -> None:
"Run `juju refresh --revision <previous-revision> postgresql` to initiate the rollback"
)

def _on_upgrade_charm_check_legacy(self, event) -> None:
if not self.peer_relation or len(self.app_units) < len(self.charm.app_units):
logger.debug("Wait all units join the upgrade relation")
return

if self.state:
# Do nothing - if state set, upgrade is supported
return

if not self.charm.unit.is_leader():
# set ready state on non-leader units
self.unit_upgrade_data.update({"state": "ready"})
return

peers_state = list(filter(lambda state: state != "", self.unit_states))

if len(peers_state) == len(self.peer_relation.units) and (set(peers_state) == {"ready"} or len(peers_state) == 0):
# All peers have set the state to ready
self.unit_upgrade_data.update({"state": "ready"})
self._prepare_upgrade_from_legacy()
getattr(self.on, "upgrade_charm").emit()

@override
def _on_upgrade_granted(self, event: UpgradeGrantedEvent) -> None:
# Refresh the charmed PostgreSQL snap and restart the database.
Expand All @@ -91,6 +118,14 @@ def _on_upgrade_granted(self, event: UpgradeGrantedEvent) -> None:
self.charm._setup_exporter()
self.charm.backup.start_stop_pgbackrest_service()

try:
self.charm.unit.set_workload_version(
self.charm._patroni.get_postgresql_version() or "unset"
)
except TypeError:
# Don't fail on this, just log it.
logger.warning("Failed to get PostgreSQL version")

# Wait until the database initialise.
self.charm.unit.status = WaitingStatus("waiting for database initialisation")
try:
Expand Down Expand Up @@ -144,3 +179,35 @@ def pre_upgrade_check(self) -> None:
"a backup is being created",
"wait for the backup creation to finish before starting the upgrade",
)

def _prepare_upgrade_from_legacy(self) -> None:
"""Prepare upgrade from legacy charm without upgrade support.

Assumes run on leader unit only.
"""
logger.warning("Upgrading from unsupported version")

# Populate app upgrade databag to allow upgrade procedure
logger.debug("Building upgrade stack")
upgrade_stack = self.build_upgrade_stack()
logger.debug(f"Upgrade stack: {upgrade_stack}")
self.upgrade_stack = upgrade_stack
logger.debug("Persisting dependencies to upgrade relation data...")
self.peer_relation.data[self.charm.app].update(
{"dependencies": json.dumps(self.dependency_model.dict())}
)
if self.charm.get_secret(APP_SCOPE, MONITORING_PASSWORD_KEY) is None:
self.charm.set_secret(APP_SCOPE, MONITORING_PASSWORD_KEY, new_password())
users = self.charm.postgresql.list_users()
if MONITORING_USER not in users:
# Create the monitoring user.
self.charm.postgresql.create_user(
MONITORING_USER,
self.charm.get_secret(APP_SCOPE, MONITORING_PASSWORD_KEY),
extra_user_roles="pg_monitor",
)

@property
def unit_upgrade_data(self) -> RelationDataContent:
"""Return the application upgrade data."""
return self.peer_relation.data[self.charm.unit]
168 changes: 168 additions & 0 deletions tests/integration/ha_tests/test_upgrade.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#!/usr/bin/env python3
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.
import json
import logging
import subprocess
import zipfile

import pytest as pytest
from pytest_operator.plugin import OpsTest

from tests.integration.ha_tests.conftest import APPLICATION_NAME
from tests.integration.ha_tests.helpers import (
app_name,
are_writes_increasing,
check_writes,
start_continuous_writes,
)
from tests.integration.helpers import get_primary

logger = logging.getLogger(__name__)


@pytest.mark.abort_on_fail
async def test_build_and_deploy(ops_test: OpsTest) -> None:
"""Build and deploy three unit of PostgreSQL."""
wait_for_apps = False
# Check if there is a pre-existing cluster.
if not await app_name(ops_test):
wait_for_apps = True
charm = await ops_test.build_charm(".")
async with ops_test.fast_forward():
await ops_test.model.deploy(charm, num_units=3)
# Deploy the continuous writes application charm if it wasn't already deployed.
if not await app_name(ops_test, APPLICATION_NAME):
wait_for_apps = True
async with ops_test.fast_forward():
charm = await ops_test.build_charm("tests/integration/ha_tests/application-charm")
await ops_test.model.deploy(charm, application_name=APPLICATION_NAME)

if wait_for_apps:
async with ops_test.fast_forward():
await ops_test.model.wait_for_idle(status="active", timeout=1000)


async def test_successful_upgrade(ops_test: OpsTest, continuous_writes) -> None:
# Start an application that continuously writes data to the database.
logger.info("starting continuous writes to the database")
app = await app_name(ops_test)
await start_continuous_writes(ops_test, app)

# Check whether writes are increasing.
logger.info("checking whether writes are increasing")
any_unit_name = next(iter(ops_test.model.applications[app].units)).name
primary_name = await get_primary(ops_test, any_unit_name)
await are_writes_increasing(ops_test, primary_name)

# Run the pre-upgrade-check action.
logger.info("running pre-upgrade check")
leader_unit_name = None
for unit in ops_test.model.applications[app].units:
if await unit.is_leader_from_status():
leader_unit_name = unit.name
break
action = await ops_test.model.units.get(leader_unit_name).run_action("pre-upgrade-check")
await action.wait()
assert action.results["Code"] == "0"

# Run juju refresh.
logger.info("refreshing the charm")
application = ops_test.model.applications[app]
charm = await ops_test.build_charm(".")
await application.refresh(path=charm)
async with ops_test.fast_forward(fast_interval="30s"):
await ops_test.model.wait_for_idle(
apps=[app], status="active", idle_period=15, raise_on_blocked=True
)

# Check whether writes are increasing.
logger.info("checking whether writes are increasing")
primary_name = await get_primary(ops_test, any_unit_name)
await are_writes_increasing(ops_test, primary_name)

# Verify that no writes to the database were missed after stopping the writes
# (check that all the units have all the writes).
logger.info("checking whether no writes were lost")
await check_writes(ops_test)


async def test_failed_upgrade(ops_test: OpsTest) -> None:
# Run the pre-upgrade-check action.
logger.info("running pre-upgrade check")
app = await app_name(ops_test)
leader_unit_name = None
for unit in ops_test.model.applications[app].units:
if await unit.is_leader_from_status():
leader_unit_name = unit.name
break
action = await ops_test.model.units.get(leader_unit_name).run_action("pre-upgrade-check")
await action.wait()
assert action.results["Code"] == "0"

# Run juju refresh.
logger.info("refreshing the charm")
charm = await ops_test.build_charm(".")
print(f"charm: {charm}")
modified_charm = f"{charm}.modified"
print(f"modified_charm: {modified_charm}")
with zipfile.ZipFile(charm, "r") as charm_file, zipfile.ZipFile(
modified_charm, "w"
) as modified_charm_file:
# Iterate the input files
unix_attributes = {}
for charm_info in charm_file.infolist():
# Read input file
with charm_file.open(charm_info) as file:
print(f"charm_info.filename: {charm_info.filename}")
if charm_info.filename == "src/dependency.json":
content = json.loads(file.read())
# Modify the content of the file by replacing a string
content["snap"]["upgrade_supported"] = "^15"
content["snap"]["version"] = "15.1"
# Write content.
modified_charm_file.writestr(charm_info.filename, json.dumps(content))
else: # Other file, don't want to modify => just copy it.
content = file.read()
modified_charm_file.writestr(charm_info.filename, content)
unix_attributes[charm_info.filename] = charm_info.external_attr >> 16

for modified_charm_info in modified_charm_file.infolist():
modified_charm_info.external_attr = unix_attributes[modified_charm_info.filename] << 16
process = subprocess.run(
f"juju refresh --model {ops_test.model.info.name} {app} --path {modified_charm}".split(),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
if process.returncode != 0:
raise Exception(
f"Expected juju refresh command to succeed instead it failed: {process.returncode} - {process.stderr.decode()}"
)
async with ops_test.fast_forward(fast_interval="30s"):
await ops_test.model.wait_for_idle(
apps=[app], status="blocked", idle_period=15, raise_on_blocked=False
)


async def test_rollback(ops_test: OpsTest) -> None:
# Run the pre-upgrade-check action.
logger.info("running pre-upgrade check")
app = await app_name(ops_test)
leader_unit_name = None
for unit in ops_test.model.applications[app].units:
if await unit.is_leader_from_status():
leader_unit_name = unit.name
break
action = await ops_test.model.units.get(leader_unit_name).run_action("pre-upgrade-check")
await action.wait()
assert action.results["Code"] == "0"

# Run juju refresh.
logger.info("refreshing the charm")
application = ops_test.model.applications[app]
charm = await ops_test.build_charm(".")
await application.refresh(path=charm)
async with ops_test.fast_forward(fast_interval="30s"):
await ops_test.model.wait_for_idle(
apps=[app], status="active", idle_period=15, raise_on_blocked=True
)
Empty file.
11 changes: 11 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,17 @@ commands =
pip install juju=={env:LIBJUJU}
poetry run pytest -v --tb native --log-cli-level=INFO -s {posargs} {[vars]tests_path}/integration/test_tls.py

[testenv:upgrade-integration-{juju2, juju3}]
description = Run upgrade integration tests
pass_env =
{[testenv]pass_env}
CI
CI_PACKED_CHARMS
commands =
poetry install --with integration
pip install juju=={env:LIBJUJU}
poetry run pytest -v --tb native --log-cli-level=INFO -s {posargs} {[vars]tests_path}/integration/ha_tests/test_upgrade.py

[testenv:integration-{juju2, juju3}]
description = Run all integration tests
pass_env =
Expand Down
Loading