Skip to content

Commit

Permalink
PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
PietroPasotti committed Nov 11, 2024
1 parent 8fea869 commit f054034
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 23 deletions.
13 changes: 2 additions & 11 deletions src/cosl/coordinated_workers/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import ops
import pydantic
import yaml
from ops import EventSource, ModelError, Object, ObjectEvents, RelationCreatedEvent
from ops import EventSource, Object, ObjectEvents, RelationCreatedEvent
from pydantic import ConfigDict
from typing_extensions import TypedDict

Expand Down Expand Up @@ -523,16 +523,7 @@ def publish_app_roles(self, roles: Iterable[str]):
relation = self.relation
if relation:
databag_model = ClusterRequirerAppData(role=",".join(roles))
try:
databag_model.dump(relation.data[self.model.app])
except ModelError as e:
if "ERROR permission denied (unauthorized access)" in e.args:
# if we are handling an event prior to 'start', we could be denied write access
# hopefully we can ignore it and trust that it will be retried at the next occasion
log.debug(
"databag dump gave a permission denied error. "
"This could be a transient issue."
)
databag_model.dump(relation.data[self.model.app])

def _get_data_from_coordinator(self) -> Optional[ClusterProviderAppData]:
"""Fetch the contents of the doordinator databag."""
Expand Down
37 changes: 25 additions & 12 deletions src/cosl/coordinated_workers/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import tenacity
import yaml
from ops import MaintenanceStatus, StatusBase
from ops.model import ActiveStatus, BlockedStatus, WaitingStatus
from ops.model import ActiveStatus, BlockedStatus, ModelError, WaitingStatus
from ops.pebble import Check, Layer, PathError, Plan, ProtocolError

from cosl import JujuTopology
Expand Down Expand Up @@ -259,19 +259,20 @@ def status(self) -> ServiceEndpointStatus:
# we really don't want this code to raise errors, so we blanket catch all.
try:
services = self._container.get_services(*layer.services.keys())
if services:
# avoid vacuous quantification
running_status = {name: svc.is_running() for name, svc in services.items()}

some_stopped = not all(running_status.values())
if some_stopped:
stopped = tuple(
name for name, running in running_status.items() if not running
if not services:
logger.debug("No services found in pebble plan.")
else:
services_not_running = [
name for name, svc in services.items() if not svc.is_running()
]
if services_not_running:
logger.debug(
f"Some services which should be running are not: {services_not_running}."
)
logger.info(f"Some services which should be running are not: {stopped}.")
return ServiceEndpointStatus.down
else:
logger.debug("All pebble services up.")

logger.info("All pebble services up.")
# so far as pebble knows all services are up, now let's see if
# the readiness endpoint confirm that
return self.check_readiness()
Expand Down Expand Up @@ -507,7 +508,19 @@ def _update_cluster_relation(self) -> None:
self.cluster.publish_unit_address(socket.getfqdn())
if self._charm.unit.is_leader() and self.roles:
logger.info(f"publishing roles: {self.roles}")
self.cluster.publish_app_roles(self.roles)
try:
self.cluster.publish_app_roles(self.roles)
except ModelError as e:
# if we are handling an event prior to 'start', we could be denied write access
# hopefully we can ignore it and trust that it will be retried at the next occasion
if "ERROR permission denied (unauthorized access)" in e.args:
logger.debug(
"relation-set failed with a permission denied error. "
"This could be a transient issue."
)
else:
# let it burn, we clearly don't know what's going on
raise

def _running_worker_config(self) -> Optional[Dict[str, Any]]:
"""Return the worker config as dict, or None if retrieval failed."""
Expand Down

0 comments on commit f054034

Please sign in to comment.