Skip to content

Commit

Permalink
[DPE-2695] + [partial DPE-2700] POC Identify primary shard + Prevent …
Browse files Browse the repository at this point in the history
…removal of Primary shards (#267)

## Issue
Primary shards are not identified, this can pose problems if removing a
primary shard

## Solution
Identify primary shards and prevent their removal

## Follow up PR
move databases that use a primary shard to a different shard if removing
primary (I don't want to add this to this PR as to reduce the complexity
in the review)

## Testing
As this is a POC there are no integration tests added, instead tests
were performed by hand
```
# deploy shards + config server
juju deploy ./*charm --config role="config-server" config-server-one 
juju deploy ./*charm --config role="shard" shard-one 
juju deploy ./*charm --config role="shard" shard-two 

# relate shards
juju integrate config-server-one:config-server shard-one:sharding
juju integrate config-server-one:config-server shard-two:sharding

# write data to shard-one
juju ssh config-server-one/0
charmed-mongodb.mongosh <URI>
use test_db
db.createCollection("cool_animals")
db.cool_animals.insertOne({"horses":"unicorn"})
sh.enableSharding("test_db")

# write data to shard-two
use test_db_2
db.createCollection("cool_animals")
db.cool_animals.insertOne({"horses":"unicorn"})
sh.enableSharding("test_db")
db.adminCommand( { movePrimary : "test_db_2", to : "shard-one" } )

# show both shards
use config 
show collections 
db.databases.find()

# exit
exit
exit

# remove shard and verify error in config server and forever wait in shard 
juju remove-relation config-server-one:config-server shard-two:sharding
juju status --watch 1s
```
  • Loading branch information
MiaAltieri authored Oct 13, 2023
1 parent 439c915 commit cef5746
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 31 deletions.
81 changes: 50 additions & 31 deletions lib/charms/mongodb/v0/mongos.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

import logging
from dataclasses import dataclass
from typing import Dict, Optional, Set
from typing import Dict, List, Optional, Set
from urllib.parse import quote_plus

from charms.mongodb.v0.mongodb import NotReadyError
from pymongo import MongoClient
from pymongo import MongoClient, collection

from config import Config

Expand Down Expand Up @@ -174,6 +174,7 @@ def remove_shard(self, shard_name: str) -> None:
RemovePrimaryShardError
"""
sc_status = self.client.admin.command("listShards")

# It is necessary to call removeShard multiple times on a shard to guarantee removal.
# Allow re-removal of shards that are currently draining.
if self._is_any_draining(sc_status, ignore_shard=shard_name):
Expand All @@ -183,37 +184,17 @@ def remove_shard(self, shard_name: str) -> None:
logger.error(cannot_remove_shard)
raise NotReadyError(cannot_remove_shard)

# TODO Follow up PR, there is no MongoDB command to retrieve primary shard, this is
# possible with mongosh.
primary_shard = self.get_primary_shard()
if primary_shard:
# TODO Future PR, support removing Primary Shard if there are no unsharded collections
# on it. All sharded collections should perform `MovePrimary`
cannot_remove_primary_shard = (
f"Shard {shard_name} is the primary shard, cannot remove."
)
databases_using_shard_as_primary = self.get_databases_for_shard(shard_name)
if databases_using_shard_as_primary:
cannot_remove_primary_shard = f"These databases: {', '.join(databases_using_shard_as_primary)}, use Shard {shard_name} is a primary shard, cannot remove shard."
logger.error(cannot_remove_primary_shard)
raise RemovePrimaryShardError(cannot_remove_primary_shard)

logger.info("Attempting to remove shard %s", shard_name)
removal_info = self.client.admin.command("removeShard", shard_name)

# process removal status
remaining_chunks = (
removal_info["remaining"]["chunks"] if "remaining" in removal_info else "None"
)
dbs_to_move = (
removal_info["dbsToMove"]
if "dbsToMove" in removal_info and removal_info["dbsToMove"] != []
else ["None"]
)
logger.info(
"Shard %s is draining status is: %s. Remaining chunks: %s. DBs to move: %s.",
shard_name,
removal_info["state"],
str(remaining_chunks),
",".join(dbs_to_move),
)
self._log_removal_info(removal_info)

def _is_shard_draining(self, shard_name: str) -> bool:
"""Reports if a given shard is currently in the draining state.
Expand All @@ -235,11 +216,31 @@ def _is_shard_draining(self, shard_name: str) -> bool:
f"Shard {shard_name} not in cluster, could not retrieve draining status"
)

def get_primary_shard(self) -> str:
"""Processes sc_status and identifies the primary shard."""
# TODO Follow up PR, implement this function there is no MongoDB command to retrieve
# primary shard, this is possible with mongosh.
return False
def get_databases_for_shard(self, primary_shard) -> Optional[List[str]]:
"""Returns a list of databases using the given shard as a primary shard.
In Sharded MongoDB clusters, mongos selects the primary shard when creating a new database
by picking the shard in the cluster that has the least amount of data. This means that:
1. There can be multiple primary shards in a cluster.
2. Until there is data written to the cluster there is effectively no primary shard.
"""
databases_collection = self._get_databases_collection()
if databases_collection is None:
return

return databases_collection.distinct("_id", {"primary": primary_shard})

def _get_databases_collection(self) -> collection.Collection:
"""Returns the databases collection if present.
The collection `databases` only gets created once data is written to the sharded cluster.
"""
config_db = self.client["config"]
if "databases" not in config_db.list_collection_names():
logger.info("No data written to sharded cluster yet.")
return None

return config_db["databases"]

@staticmethod
def _is_any_draining(sc_status: Dict, ignore_shard: str = "") -> bool:
Expand Down Expand Up @@ -267,3 +268,21 @@ def _hostname_from_hostport(hostname: str) -> str:
e.g. output: shard03
"""
return hostname.split("/")[0]

def _log_removal_info(self, removal_info, shard_name):
"""Logs removal information for a shard removal."""
remaining_chunks = (
removal_info["remaining"]["chunks"] if "remaining" in removal_info else "None"
)
dbs_to_move = (
removal_info["dbsToMove"]
if "dbsToMove" in removal_info and removal_info["dbsToMove"] != []
else ["None"]
)
logger.info(
"Shard %s is draining status is: %s. Remaining chunks: %s. DBs to move: %s.",
shard_name,
removal_info["state"],
str(remaining_chunks),
",".join(dbs_to_move),
)
7 changes: 7 additions & 0 deletions lib/charms/mongodb/v0/shards_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
)
from charms.mongodb.v0.mongos import (
MongosConnection,
RemovePrimaryShardError,
ShardNotInClusterError,
ShardNotPlannedForRemovalError,
)
Expand Down Expand Up @@ -170,6 +171,12 @@ def _on_relation_event(self, event):

logger.error("Deferring _on_relation_event for shards interface since: error=%r", e)
event.defer()
except RemovePrimaryShardError:
cannot_proceed = (
"Attempt made to remove a primary shard, do not permit other hooks to execute."
)
logger.error(cannot_proceed)
raise
except (PyMongoError, NotReadyError) as e:
logger.error("Deferring _on_relation_event for shards interface since: error=%r", e)
event.defer()
Expand Down

0 comments on commit cef5746

Please sign in to comment.