Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DPE-2763] POC Status reporting shard side #275

Merged
merged 12 commits into from
Oct 27, 2023
37 changes: 25 additions & 12 deletions lib/charms/mongodb/v1/mongodb_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 1
LIBPATCH = 2

logger = logging.getLogger(__name__)
REL_NAME = "database"
Expand Down Expand Up @@ -82,6 +82,28 @@ def __init__(self, charm: CharmBase, substrate="k8s", relation_name: str = "data
self.database_provides.on.database_requested, self._on_relation_event
)

def pass_hook_checks(self) -> bool:
"""Runs the pre-hooks checks for MongoDBProvider, returns True if all pass."""
if not self.charm.is_relation_feasible(self.relation_name):
logger.info("Skipping code for relations.")
return False

# legacy relations have auth disabled, which new relations require
if self.model.get_relation(LEGACY_REL_NAME):
self.charm.unit.status = BlockedStatus("cannot have both legacy and new relations")
logger.error("Auth disabled due to existing connections to legacy relations")
return False

if not self.charm.unit.is_leader():
return False

# We shouldn't try to create or update users if the database is not
# initialised. We will create users as part of initialisation.
if not self.charm.db_initialised:
return False

return True

def _on_relation_event(self, event):
"""Handle relation joined events.

Expand All @@ -90,17 +112,8 @@ def _on_relation_event(self, event):
data. As a result, related charm gets credentials for accessing the
MongoDB database.
"""
if not self.charm.unit.is_leader():
return
# We shouldn't try to create or update users if the database is not
# initialised. We will create users as part of initialisation.
if "db_initialised" not in self.charm.app_peer_data:
return

# legacy relations have auth disabled, which new relations require
if self.model.get_relation(LEGACY_REL_NAME):
self.charm.unit.status = BlockedStatus("cannot have both legacy and new relations")
logger.error("Auth disabled due to existing connections to legacy relations")
if not self.pass_hook_checks():
logger.info("Skipping %s: hook checks did not pass", type(event))
return

# If auth is disabled but there are no legacy relation users, this means that legacy
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@
LIBID = "896a48bc89b84d30839335bb37170509"

# Increment this major API version when introducing breaking changes
LIBAPI = 0
LIBAPI = 1

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 4
LIBPATCH = 0
logger = logging.getLogger(__name__)
REL_NAME = "database"

Expand All @@ -41,6 +41,7 @@ def __init__(self, charm):
"""Manager of MongoDB client relations."""
super().__init__(charm, "client-relations")
self.charm = charm
self.relation_name = LEGACY_REL_NAME
self.framework.observe(
self.charm.on[LEGACY_REL_NAME].relation_created, self._on_legacy_relation_created
)
Expand All @@ -64,6 +65,10 @@ def _on_legacy_relation_created(self, event):
)
return

if not self.charm.is_relation_feasible(self.relation_name):
logger.info("Skipping code for legacy relations.")
return

# If auth is already disabled its likely it has a connection with another legacy relation
# user. Shutting down and restarting mongod would lead to downtime for the other legacy
# relation user and hence shouldn't be done. Not to mention there is no need to disable
Expand Down
34 changes: 32 additions & 2 deletions lib/charms/mongodb/v1/mongos.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from charms.mongodb.v0.mongodb import NotReadyError
from pymongo import MongoClient, collection
from tenacity import Retrying, stop_after_delay, wait_fixed
from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed

from config import Config

Expand All @@ -21,7 +21,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 1
LIBPATCH = 2

# path to store mongodb ketFile
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -334,6 +334,36 @@ def _log_removal_info(self, removal_info, shard_name):
",".join(dbs_to_move),
)

@property
def is_ready(self) -> bool:
"""Is mongos ready for services requests.

Returns:
True if services is ready False otherwise. Retries over a period of 60 seconds times to
allow server time to start up.

Raises:
ConfigurationError, ConfigurationError, OperationFailure
"""
try:
for attempt in Retrying(stop=stop_after_delay(60), wait=wait_fixed(3)):
with attempt:
# The ping command is cheap and does not require auth.
self.client.admin.command("ping")
MiaAltieri marked this conversation as resolved.
Show resolved Hide resolved
except RetryError:
return False

return True

def is_shard_aware(self, shard_name: str) -> bool:
"""Returns True if provided shard is shard aware."""
sc_status = self.client.admin.command("listShards")
for shard in sc_status["shards"]:
if shard["_id"] == shard_name:
return shard["state"] == 1

return False

def _retrieve_remaining_chunks(self, removal_info) -> int:
"""Parses the remaining chunks to remove from removeShard command."""
return removal_info["remaining"]["chunks"] if "remaining" in removal_info else 0
Expand Down
126 changes: 108 additions & 18 deletions lib/charms/mongodb/v1/shards_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
PyMongoError,
)
from charms.mongodb.v1.helpers import KEY_FILE
from charms.mongodb.v1.mongodb_provider import LEGACY_REL_NAME, REL_NAME
from charms.mongodb.v1.mongos import (
BalancerNotEnabledError,
MongosConnection,
Expand All @@ -28,7 +29,13 @@
from charms.mongodb.v1.users import MongoDBUser, OperatorUser
from ops.charm import CharmBase, EventBase, RelationBrokenEvent
from ops.framework import Object
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, WaitingStatus
from ops.model import (
ActiveStatus,
BlockedStatus,
MaintenanceStatus,
StatusBase,
WaitingStatus,
)
from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed

from config import Config
Expand All @@ -44,7 +51,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 1
LIBPATCH = 2
KEYFILE_KEY = "key-file"
HOSTS_KEY = "host"
OPERATOR_PASSWORD_KEY = MongoDBUser.get_password_key_name_for_user(OperatorUser.get_username())
Expand Down Expand Up @@ -106,12 +113,8 @@ def _on_relation_joined(self, event):

def pass_hook_checks(self, event: EventBase) -> bool:
"""Runs the pre-hooks checks for ShardingProvider, returns True if all pass."""
if self.charm.is_role(Config.Role.REPLICATION):
self.charm.unit.status = BlockedStatus("role replication does not support sharding")
logger.error(
"Skipping %s. Sharding interface not supported with config role=replication.",
type(event),
)
if not self.charm.is_relation_feasible(self.relation_name):
logger.info("Skipping event %s , relation not feasible.", type(event))
return False

if not self.charm.is_role(Config.Role.CONFIG_SERVER):
Expand Down Expand Up @@ -268,6 +271,10 @@ def update_mongos_hosts(self):
for relation in self.charm.model.relations[self.relation_name]:
self._update_relation_data(relation.id, {HOSTS_KEY: json.dumps(self.charm._unit_ips)})

def get_config_server_status(self):
"""TODO: Implement this function in a separate PR."""
return None

def _update_relation_data(self, relation_id: int, data: dict) -> None:
"""Updates a set of key-value pairs in the relation.

Expand Down Expand Up @@ -349,6 +356,10 @@ def _on_relation_changed(self, event):
logger.info("Skipping relation joined event: hook checks re not passed")
return

# if re-using an old shard, re-set drained flag.
self.charm.app_peer_data["drained"] = json.dumps(False)
MiaAltieri marked this conversation as resolved.
Show resolved Hide resolved
self.charm.unit.status = MaintenanceStatus("Adding shard to config-server")

# shards rely on the config server for secrets
relation_data = event.relation.data[event.app]
self.update_keyfile(key_file_contents=relation_data.get(KEYFILE_KEY))
Expand All @@ -361,8 +372,6 @@ def _on_relation_changed(self, event):
event.defer()
return

self.charm.unit.status = MaintenanceStatus("Adding shard to config-server")

if not self.charm.unit.is_leader():
return

Expand All @@ -377,13 +386,12 @@ def _on_relation_changed(self, event):
)
return

# TODO future PR, leader unit verifies shard was added to cluster (update-status hook)
self.charm.app_peer_data["added_to_cluster"] = json.dumps(True)

def pass_hook_checks(self, event):
"""Runs the pre-hooks checks for ConfigServerRequirer, returns True if all pass."""
if self.charm.is_role(Config.Role.REPLICATION):
self.charm.unit.status = BlockedStatus("role replication does not support sharding")
logger.error("sharding interface not supported with config role=replication")
if not self.charm.is_relation_feasible(self.relation_name):
logger.info("Skipping event %s , relation not feasible.", type(event))
return False

if not self.charm.is_role(Config.Role.SHARD):
Expand Down Expand Up @@ -426,8 +434,9 @@ def _on_relation_broken(self, event: RelationBrokenEvent) -> None:
self.wait_for_draining(mongos_hosts)

self.charm.unit.status = ActiveStatus("Shard drained from cluster, ready for removal")
# TODO future PR, leader unit displays this message in update-status hook
# TODO future PR, check for shard drainage when removing application

if self.charm.unit.is_leader():
self.charm.app_peer_data["added_to_cluster"] = json.dumps(False)

def wait_for_draining(self, mongos_hosts: List[str]):
"""Waits for shards to be drained from sharded cluster."""
Expand All @@ -438,6 +447,7 @@ def wait_for_draining(self, mongos_hosts: List[str]):
# no need to continuously check and abuse resources while shard is draining
time.sleep(10)
drained = self.drained(mongos_hosts, self.charm.app.name)
self.charm.unit.status = MaintenanceStatus("Draining shard from cluster")
draining_status = (
"Shard is still draining" if not drained else "Shard is fully drained."
)
Expand All @@ -459,6 +469,44 @@ def wait_for_draining(self, mongos_hosts: List[str]):

break

def get_shard_status(self) -> Optional[StatusBase]:
"""Returns the current status of the shard.

Note: No need to report if currently draining, since that check block other hooks from
executing.
"""
if not self.charm.is_role(Config.Role.SHARD):
logger.info("skipping status check, charm is not running as a shard")
return None

if not self.charm.db_initialised:
logger.info("No status for shard to report, waiting for db to be initialised.")
return None

if self.model.get_relation(LEGACY_REL_NAME):
return BlockedStatus(f"relation {LEGACY_REL_NAME} to shard not supported.")

if self.model.get_relation(REL_NAME):
return BlockedStatus(f"relation {REL_NAME} to shard not supported.")

if not self.model.get_relation(self.relation_name) and not self.charm.drained:
return BlockedStatus("missing relation to config server")

if not self.model.get_relation(self.relation_name) and self.charm.drained:
return ActiveStatus("Shard drained from cluster, ready for removal")

if not self._is_mongos_reachable():
return BlockedStatus("Config server unreachable")

if not self._is_added_to_cluster():
return MaintenanceStatus("Adding shard to config-server")

if not self._is_shard_aware():
return BlockedStatus("Shard is not yet shard aware")

config_server_name = self.get_related_config_server()
return ActiveStatus(f"Shard connected to config-server: {config_server_name}")

def drained(self, mongos_hosts: Set[str], shard_name: str) -> bool:
"""Returns whether a shard has been drained from the cluster.

Expand Down Expand Up @@ -564,16 +612,58 @@ def _update_relation_data(self, relation_id: int, data: dict) -> None:
if relation:
relation.data[self.charm.model.app].update(data)

def _is_mongos_reachable(self) -> bool:
"""Returns True if mongos is reachable."""
if not self.model.get_relation(self.relation_name):
logger.info("Mongos is not reachable, no relation to config-sever")
return False

mongos_hosts = self.get_mongos_hosts()
if not mongos_hosts:
return False

self.charm.remote_mongos_config(set(mongos_hosts))
config = self.charm.remote_mongos_config(set(mongos_hosts))

# use a URI that is not dependent on the operator password, as we are not guaranteed that
# the shard has received the password yet.
uri = f"mongodb://{','.join(mongos_hosts)}"
with MongosConnection(config, uri) as mongo:
return mongo.is_ready

def _is_added_to_cluster(self) -> bool:
"""Returns True if the shard has been added to the cluster."""
return json.loads(self.charm.app_peer_data.get("added_to_cluster", "False"))

def _is_shard_aware(self) -> bool:
"""Returns True if shard is in cluster and shard aware."""
if not self.model.get_relation(self.relation_name):
logger.info(
"Mongos is not reachable, no relation to config-sever, cannot check shard status."
)
return False

mongos_hosts = self.get_mongos_hosts()
with MongosConnection(self.charm.remote_mongos_config(set(mongos_hosts))) as mongo:
return mongo.is_shard_aware(shard_name=self.charm.app.name)

def has_config_server(self) -> bool:
"""Returns True if currently related to config server."""
return len(self.charm.model.relations[self.relation_name]) > 0

def get_related_config_server(self) -> List[str]:
def get_related_config_server(self) -> str:
"""Returns the related config server."""
return [rel.app.name for rel in self.charm.model.relations[self.relation_name]]
if self.relation_name not in self.charm.model.relations:
return None

# metadata.yaml prevents having multiple config servers
return self.charm.model.relations[self.relation_name][0].app.name

def get_mongos_hosts(self) -> List[str]:
"""Returns a list of IP addresses for the mongos hosts."""
# only one related config-server is possible
config_server_relation = self.charm.model.relations[self.relation_name][0]
if HOSTS_KEY not in config_server_relation.data[config_server_relation.app]:
return

return json.loads(config_server_relation.data[config_server_relation.app].get(HOSTS_KEY))
Loading
Loading