Skip to content

Commit

Permalink
Improve statuses
Browse files Browse the repository at this point in the history
Signed-off-by: Marcelo Henrique Neppel <[email protected]>
  • Loading branch information
marceloneppel committed May 31, 2024
1 parent 7ac4f3f commit 02a584c
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 28 deletions.
2 changes: 1 addition & 1 deletion src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1357,7 +1357,7 @@ def _set_primary_status_message(self) -> None:
if self._patroni.get_primary(unit_name_pattern=True) == self.unit.name:
self.unit.status = ActiveStatus("Primary")
elif self.is_standby_leader:
self.unit.status = ActiveStatus("Standby Leader")
self.unit.status = ActiveStatus("Standby")
elif self._patroni.member_started:
self.unit.status = ActiveStatus()
except (RetryError, ConnectionError) as e:
Expand Down
51 changes: 32 additions & 19 deletions src/relations/async_replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from ops import (
ActionEvent,
ActiveStatus,
Application,
BlockedStatus,
MaintenanceStatus,
Expand All @@ -52,7 +53,7 @@
logger = logging.getLogger(__name__)


READ_ONLY_MODE_BLOCKING_MESSAGE = "Cluster in read-only mode"
READ_ONLY_MODE_BLOCKING_MESSAGE = "Standalone read-only cluster"
REPLICATION_CONSUMER_RELATION = "replication"
REPLICATION_OFFER_RELATION = "replication-offer"
SECRET_LABEL = "async-replication-secret"
Expand Down Expand Up @@ -124,13 +125,11 @@ def _can_promote_cluster(self, event: ActionEvent) -> bool:
if standby_leader is not None:
try:
self.charm._patroni.promote_standby_cluster()
if (
self.charm.is_blocked
and self.charm.unit.status.message == READ_ONLY_MODE_BLOCKING_MESSAGE
):
if self.charm.app.status.message == READ_ONLY_MODE_BLOCKING_MESSAGE:
self.charm._peers.data[self.charm.app].update({
"promoted-cluster-counter": ""
})
self._set_app_status()
self.charm._set_primary_status_message()
except (StandbyClusterAlreadyPromotedError, ClusterNotPromotedError) as e:
event.fail(str(e))
Expand Down Expand Up @@ -426,19 +425,11 @@ def _handle_forceful_promotion(self, event: ActionEvent) -> bool:

def handle_read_only_mode(self) -> None:
"""Handle read-only mode (standby cluster that lost the relation with the primary cluster)."""
promoted_cluster_counter = self.charm._peers.data[self.charm.app].get(
"promoted-cluster-counter", ""
)
if not self.charm.is_blocked or (
promoted_cluster_counter != "0"
and self.charm.unit.status.message == READ_ONLY_MODE_BLOCKING_MESSAGE
):
if not self.charm.is_blocked:
self.charm._set_primary_status_message()
if (
promoted_cluster_counter == "0"
and self.charm.unit.status.message != READ_ONLY_MODE_BLOCKING_MESSAGE
):
self.charm.unit.status = BlockedStatus(READ_ONLY_MODE_BLOCKING_MESSAGE)

if self.charm.unit.is_leader():
self._set_app_status()

def _handle_replication_change(self, event: ActionEvent) -> bool:
if not self._can_promote_cluster(event):
Expand Down Expand Up @@ -504,14 +495,17 @@ def _on_async_relation_broken(self, _) -> None:
if self.charm._patroni.get_standby_leader() is not None:
if self.charm.unit.is_leader():
self.charm._peers.data[self.charm.app].update({"promoted-cluster-counter": "0"})
self.charm.unit.status = BlockedStatus(READ_ONLY_MODE_BLOCKING_MESSAGE)
self._set_app_status()
else:
if self.charm.unit.is_leader():
self.charm._peers.data[self.charm.app].update({"promoted-cluster-counter": ""})
self.charm.update_config()

def _on_async_relation_changed(self, event: RelationChangedEvent) -> None:
"""Update the Patroni configuration if one of the clusters was already promoted."""
if self.charm.unit.is_leader():
self._set_app_status()

primary_cluster = self._get_primary_cluster()
logger.debug("Primary cluster: %s", primary_cluster)
if primary_cluster is None:
Expand Down Expand Up @@ -590,7 +584,10 @@ def _on_create_replication(self, event: ActionEvent) -> None:

def _on_promote_to_primary(self, event: ActionEvent) -> None:
"""Promote this cluster to the primary cluster."""
if self._get_primary_cluster() is None:
if (
self.charm.app.status.message != READ_ONLY_MODE_BLOCKING_MESSAGE
and self._get_primary_cluster() is None
):
event.fail(
"No primary cluster found. Run `create-replication` action in the cluster where the offer was created."
)
Expand Down Expand Up @@ -674,6 +671,22 @@ def _relation(self) -> Relation:
if relation is not None:
return relation

def _set_app_status(self) -> None:
"""Set the app status."""
if self.charm._peers.data[self.charm.app].get("promoted-cluster-counter") == "0":
self.charm.app.status = BlockedStatus(READ_ONLY_MODE_BLOCKING_MESSAGE)
return
if self._relation is None:
self.charm.app.status = ActiveStatus()
return
primary_cluster = self._get_primary_cluster()
if primary_cluster is None:
self.charm.app.status = ActiveStatus()
else:
self.charm.app.status = ActiveStatus(
"Primary" if self.charm.app == primary_cluster else "Standby"
)

def _stop_database(self, event: RelationChangedEvent) -> bool:
"""Stop the database."""
if (
Expand Down
10 changes: 5 additions & 5 deletions tests/integration/ha_tests/test_async_replication.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,10 @@ async def test_promote_standby(
async with ops_test.fast_forward(FAST_INTERVAL), fast_forward(second_model, FAST_INTERVAL):
await gather(
first_model.wait_for_idle(
apps=[DATABASE_APP_NAME],
status="blocked",
idle_period=IDLE_PERIOD,
timeout=TIMEOUT,
apps=[DATABASE_APP_NAME], idle_period=IDLE_PERIOD, timeout=TIMEOUT
),
first_model.block_until(
lambda: first_model.applications[DATABASE_APP_NAME].status == "blocked",
),
second_model.wait_for_idle(
apps=[DATABASE_APP_NAME], status="active", idle_period=IDLE_PERIOD, timeout=TIMEOUT
Expand Down Expand Up @@ -378,7 +378,7 @@ async def test_reestablish_relation(
leader_unit = await get_leader_unit(ops_test, DATABASE_APP_NAME)
assert leader_unit is not None, "No leader unit found"
logger.info("promoting the first cluster")
run_action = await leader_unit.run_action("promote-to-primary")
run_action = await leader_unit.run_action("create-replication")
await run_action.wait()
assert (run_action.results.get("return-code", None) == 0) or (
run_action.results.get("Code", None) == "0"
Expand Down
4 changes: 1 addition & 3 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2295,9 +2295,7 @@ def test_set_active_status(self, _get_primary, _is_standby_leader, _member_start
self.charm.unit.status.message,
"Primary"
if values[0] == self.charm.unit.name
else (
"Standby Leader" if values[1] else ("" if values[2] else "fake status")
),
else ("Standby" if values[1] else ("" if values[2] else "fake status")),
)
else:
_get_primary.side_effect = values[0]
Expand Down

0 comments on commit 02a584c

Please sign in to comment.