Skip to content

Commit

Permalink
Add pre-upgrade check structure (#262)
Browse files Browse the repository at this point in the history
The checks themselves are not implemented
  • Loading branch information
carlcsaposs-canonical authored Apr 30, 2024
1 parent 704e521 commit 2071f19
Show file tree
Hide file tree
Showing 5 changed files with 118 additions and 4 deletions.
3 changes: 3 additions & 0 deletions actions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ restore:
required:
- backup-id

pre-upgrade-check:
description: Check if charm is ready to upgrade

resume-upgrade:
description: Upgrade remaining units (after you manually verified that upgraded units are healthy).

Expand Down
44 changes: 42 additions & 2 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ def __init__(self, *args):
self.framework.observe(
self.on[upgrade.PEER_RELATION_ENDPOINT_NAME].relation_changed, self._reconcile_upgrade
)
self.framework.observe(
self.on[upgrade.PRECHECK_ACTION_NAME].action, self._on_pre_upgrade_check_action
)
self.framework.observe(
self.on[upgrade.RESUME_ACTION_NAME].action, self._on_resume_upgrade_action
)
Expand Down Expand Up @@ -96,7 +99,15 @@ def _reconcile_upgrade(self, _=None):
self._set_upgrade_status()
return
if self._upgrade.unit_state is upgrade.UnitState.OUTDATED:
if self._upgrade.authorized:
try:
authorized = self._upgrade.authorized
except upgrade.PrecheckFailed as exception:
self._set_upgrade_status()
self.unit.status = exception.status
logger.debug(f"Set unit status to {self.unit.status}")
logger.error(exception.status.message)
return
if authorized:
self._set_upgrade_status()
self._upgrade_opensearch_event.emit()
else:
Expand All @@ -107,7 +118,12 @@ def _reconcile_upgrade(self, _=None):

def _set_upgrade_status(self):
# Set/clear upgrade unit status if no other unit status
if isinstance(self.unit.status, ops.ActiveStatus):
if isinstance(self.unit.status, ops.ActiveStatus) or (
isinstance(self.unit.status, ops.BlockedStatus)
and self.unit.status.message.startswith(
"Rollback with `juju refresh`. Pre-upgrade check failed:"
)
):
self.status.set(self._upgrade.get_unit_juju_status() or ops.ActiveStatus())
logger.debug(f"Set unit status to {self.unit.status}")
if not self.unit.is_leader():
Expand All @@ -134,6 +150,30 @@ def _on_upgrade_charm(self, _):
# `upgrade_resumed`
self._reconcile_upgrade()

def _on_pre_upgrade_check_action(self, event: ops.ActionEvent) -> None:
if not self._unit_lifecycle.authorized_leader:
message = f"Must run action on leader unit. (e.g. `juju run {self.app.name}/leader {upgrade.PRECHECK_ACTION_NAME}`)"
logger.debug(f"Pre-upgrade check event failed: {message}")
event.fail(message)
return
if not self._upgrade or self._upgrade.in_progress:
message = "Upgrade already in progress"
logger.debug(f"Pre-upgrade check event failed: {message}")
event.fail(message)
return
try:
self._upgrade.pre_upgrade_check()
except upgrade.PrecheckFailed as exception:
message = (
f"Charm is *not* ready for upgrade. Pre-upgrade check failed: {exception.message}"
)
logger.debug(f"Pre-upgrade check event failed: {message}")
event.fail(message)
return
message = "Charm is ready for upgrade"
event.set_results({"result": message})
logger.debug(f"Pre-upgrade check event succeeded: {message}")

def _on_resume_upgrade_action(self, event: ops.ActionEvent) -> None:
if not self._unit_lifecycle.authorized_leader:
message = f"Must run action on leader unit. (e.g. `juju run {self.app.name}/leader {upgrade.RESUME_ACTION_NAME}`)"
Expand Down
20 changes: 18 additions & 2 deletions src/machine_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,28 @@ def authorized(self) -> bool:
"""Whether this unit is authorized to upgrade
Only applies to machine charm
Raises:
PrecheckFailed: App is not ready to upgrade
"""
assert self._unit_workload_container_version != self._app_workload_container_version
assert self.versions_set
for index, unit in enumerate(self._sorted_units):
if unit.name == self._unit.name:
# Higher number units have already upgraded
if index == 1:
if index == 0:
if (
json.loads(self._app_databag["versions"])["charm"]
== self._current_versions["charm"]
):
# Assumes charm version uniquely identifies charm revision
logger.debug("Rollback detected. Skipping pre-upgrade check")
else:
# Run pre-upgrade check
# (in case user forgot to run pre-upgrade-check action)
self.pre_upgrade_check()
logger.debug("Pre-upgrade check after `juju refresh` successful")
elif index == 1:
# User confirmation needed to resume upgrade (i.e. upgrade second unit)
logger.debug(f"Second unit authorized to upgrade if {self.upgrade_resumed=}")
return self.upgrade_resumed
Expand All @@ -171,7 +187,7 @@ def upgrade_unit(self, *, snap: OpenSearchSnap) -> None:
Only applies to machine charm
"""
logger.debug(f"Upgrading {self.authorized=}")
logger.debug("Upgrading unit")
self.unit_state = upgrade.UnitState.UPGRADING
snap.install()
self._unit_workload_container_version = _SNAP_REVISION
Expand Down
14 changes: 14 additions & 0 deletions src/status_exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.

"""Exception with ops status"""

import ops


class StatusException(Exception):
"""Exception with ops status"""

def __init__(self, status: ops.StatusBase) -> None:
super().__init__(status.message)
self.status = status
41 changes: 41 additions & 0 deletions src/upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@
import poetry.core.constraints.version as poetry_version
from charms.opensearch.v0.opensearch_distro import OpenSearchDistribution

import status_exception

logger = logging.getLogger(__name__)

PEER_RELATION_ENDPOINT_NAME = "upgrade-version-a"
PRECHECK_ACTION_NAME = "pre-upgrade-check"
RESUME_ACTION_NAME = "resume-upgrade"


Expand All @@ -34,6 +37,18 @@ class PeerRelationNotReady(Exception):
"""Upgrade peer relation not available (to this unit)"""


class PrecheckFailed(status_exception.StatusException):
"""App is not ready to upgrade"""

def __init__(self, message: str):
self.message = message
super().__init__(
ops.BlockedStatus(
f"Rollback with `juju refresh`. Pre-upgrade check failed: {self.message}"
)
)


class UnitState(str, enum.Enum):
"""Unit upgrade state"""

Expand Down Expand Up @@ -225,3 +240,29 @@ def upgrade_unit(self, *, snap: OpenSearchDistribution) -> None:
Only applies to machine charm
"""

def pre_upgrade_check(self) -> None:
"""Check if this app is ready to upgrade
Runs before any units are upgraded
Does *not* run during rollback
On machines, this runs before any units are upgraded (after `juju refresh`)
On machines & Kubernetes, this also runs during pre-upgrade-check action
Can run on leader or non-leader unit
Raises:
PrecheckFailed: App is not ready to upgrade
TODO Kubernetes: Run (some) checks after `juju refresh` (in case user forgets to run
pre-upgrade-check action). Note: 1 unit will upgrade before we can run checks (checks may
need to be modified).
See https://chat.canonical.com/canonical/pl/cmf6uhm1rp8b7k8gkjkdsj4mya
"""
logger.debug("Running pre-upgrade checks")
# TODO: implement checks
# e.g.
# if health != green:
# raise PrecheckFailed("Cluster is not healthy")

0 comments on commit 2071f19

Please sign in to comment.