diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 6babe86dc..d69bf3ad0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -16,16 +16,7 @@ on: jobs: lint: name: Lint - runs-on: ubuntu-latest - timeout-minutes: 5 - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Install tox - # TODO: Consider replacing with custom image on self-hosted runner OR pinning version - run: python3 -m pip install tox - - name: Run linters - run: tox run -e lint + uses: canonical/data-platform-workflows/.github/workflows/lint.yaml@v12.4.1 unit-test: name: Unit tests @@ -39,6 +30,7 @@ jobs: run: python3 -m pip install tox - name: Run tests run: tox run -e unit + lib-check: name: Check libraries runs-on: ubuntu-latest @@ -54,6 +46,36 @@ jobs: credentials: "${{ secrets.CHARMHUB_TOKEN }}" # FIXME: current token will expire in 2023-07-04 github-token: "${{ secrets.GITHUB_TOKEN }}" + check-terraform: + name: Check Terraform + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./terraform + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + + - name: Initialize Terraform Module + run: terraform init + + - name: Validate Terraform Module + run: terraform validate -no-color + + - name: Validate terraform fmt + run: | + set +e + terraform fmt -recursive -check -diff + FMT_STATUS="$?" + if [[ "$FMT_STATUS" -ne 0 ]]; then + echo "❌ terraform fmt failed" >> "$GITHUB_STEP_SUMMARY" + fi + exit "$FMT_STATUS" + build: strategy: matrix: diff --git a/.gitignore b/.gitignore index 6c5bfdde6..e84887dfd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,54 @@ -venv/ -build/ -*.charm -.tox/ +*.idea .vscode/ .coverage +.tox/ +venv/ +build/ + +# Python +**/venv/** +*.pyc +.python-version +.mypy_cache/ __pycache__/ *.py[cod] + +# Charmcraft +*.charm + +# Local .terraform directories +**/.terraform/* + +# .tfstate files +*.tfstate +*.tfstate.* + +# Crash log files +crash.log +crash.*.log + +# Exclude all .tfvars files, which are likely to contain sensitive data, such as +# password, private keys, and other secrets. These should not be part of version +# control as they are data points which are potentially sensitive and subject +# to change depending on the environment. +*.tfvars +*.tfvars.json + +# Ignore override files as they are usually used to override resources locally and so +# are not checked in +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Include override files you do wish to add to version control using negated pattern +# !example_override.tf + +# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan +# example: *tfplan* + +# Ignore CLI configuration files +.terraformrc +terraform.rc +.terraform.lock.hcl + diff --git a/README.md b/README.md index 416e22a46..b31dde252 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ # Charmed MongoDB Operator +[![Charmhub](https://charmhub.io/mongodb/badge.svg)](https://charmhub.io/mongodb) +[![Release](https://github.com/canonical/mongodb-operator/actions/workflows/release.yaml/badge.svg)](https://github.com/canonical/mongodb-operator/actions/workflows/release.yaml) +[![Tests](https://github.com/canonical/mongodb-operator/actions/workflows/ci.yaml/badge.svg?branch=main)](https://github.com/canonical/mongodb-operator/actions/workflows/ci.yaml) ## Overview diff --git a/lib/charms/mongodb/v1/shards_interface.py b/lib/charms/mongodb/v1/shards_interface.py index 4669974ff..824f56a18 100644 --- a/lib/charms/mongodb/v1/shards_interface.py +++ b/lib/charms/mongodb/v1/shards_interface.py @@ -55,7 +55,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 7 +LIBPATCH = 8 KEYFILE_KEY = "key-file" HOSTS_KEY = "host" OPERATOR_PASSWORD_KEY = MongoDBUser.get_password_key_name_for_user(OperatorUser.get_username()) @@ -363,10 +363,7 @@ def _update_relation_data(self, relation_id: int, data: dict) -> None: data: dict containing the key-value pairs that should be updated in the relation. """ - if self.charm.unit.is_leader(): - relation = self.charm.model.get_relation(self.relation_name, relation_id) - if relation: - relation.data[self.charm.model.app].update(data) + self.database_provides.update_relation_data(relation_id, data) def _get_shards_from_relations(self, departed_shard_id: Optional[int]): """Returns a list of the shards related to the config-server.""" @@ -442,6 +439,27 @@ def get_draining_shards(self) -> List[str]: return draining_shards + def cluster_password_synced(self) -> bool: + """Returns True if the cluster password is synced.""" + # base case: not config-server + if not self.charm.is_role(Config.Role.CONFIG_SERVER): + return True + + # base case: no cluster relation + if not self.model.relations[self.relation_name]: + return True + + try: + # check our ability to use connect to cluster + with MongosConnection(self.charm.mongos_config) as mongos: + mongos.get_shard_members() + except OperationFailure as e: + if e.code == 18: # Unauthorized Error - i.e. password is not in sync + return False + raise + + return True + class ConfigServerRequirer(Object): """Manage relations between the config server and the shard, on the shard's side.""" @@ -466,6 +484,10 @@ def __init__( charm.on[self.relation_name].relation_changed, self._on_relation_changed ) + self.framework.observe( + getattr(self.charm.on, "secret_changed"), self._handle_changed_secrets + ) + self.framework.observe( charm.on[self.relation_name].relation_departed, self.charm.check_relation_broken_or_scale_down, @@ -475,6 +497,50 @@ def __init__( charm.on[self.relation_name].relation_broken, self._on_relation_broken ) + def _handle_changed_secrets(self, event) -> None: + """Update operator and backup user passwords when rotation occurs. + + Changes in secrets do not re-trigger a relation changed event, so it is necessary to listen + to secret changes events. + """ + if ( + not self.charm.unit.is_leader() + or not event.secret.label + or not self.model.get_relation(self.relation_name) + ): + return + + config_server_relation = self.model.get_relation(self.relation_name) + + # many secret changed events occur, only listen to those related to our interface with the + # config-server + secret_changing_label = event.secret.label + sharding_secretes_label = f"{self.relation_name}.{config_server_relation.id}.extra.secret" + if secret_changing_label != sharding_secretes_label: + logger.info( + "A secret unrelated to this sharding relation %s is changing, igorning secret changed event.", + str(config_server_relation.id), + ) + return + + operator_password = self.database_requires.fetch_relation_field( + config_server_relation.id, OPERATOR_PASSWORD_KEY + ) + backup_password = self.database_requires.fetch_relation_field( + config_server_relation.id, BACKUP_PASSWORD_KEY + ) + + try: + self.update_password( + username=OperatorUser.get_username(), new_password=operator_password + ) + self.update_password(BackupUser.get_username(), new_password=backup_password) + except RetryError: + self.charm.unit.status = BlockedStatus("Failed to rotate cluster secrets") + logger.error("Shard failed to rotate cluster secrets.") + event.defer() + return + def _on_relation_changed(self, event): """Retrieves secrets from config-server and updates them within the shard.""" if not self.pass_hook_checks(event): @@ -483,6 +549,8 @@ def _on_relation_changed(self, event): # if re-using an old shard, re-set drained flag. self.charm.unit_peer_data["drained"] = json.dumps(False) + + # TODO: Future PR better status message behavior self.charm.unit.status = MaintenanceStatus("Adding shard to config-server") # shards rely on the config server for secrets @@ -773,10 +841,7 @@ def _update_relation_data(self, relation_id: int, data: dict) -> None: data: dict containing the key-value pairs that should be updated in the relation. """ - if self.charm.unit.is_leader(): - relation = self.charm.model.get_relation(self.relation_name, relation_id) - if relation: - relation.data[self.charm.model.app].update(data) + self.database_requires.update_relation_data(relation_id, data) def _is_mongos_reachable(self) -> bool: """Returns True if mongos is reachable.""" @@ -809,6 +874,37 @@ def _is_added_to_cluster(self) -> bool: raise + def cluster_password_synced(self) -> bool: + """Returns True if the cluster password is synced for the shard.""" + # base case: not a shard + if not self.charm.is_role(Config.Role.SHARD): + return True + + # base case: no cluster relation + if not self.model.get_relation(self.relation_name): + return True + + try: + # check our ability to use connect to both mongos and our current replica set. + mongos_reachable = self._is_mongos_reachable() + with MongoDBConnection(self.charm.mongodb_config) as mongo: + mongod_reachable = mongo.is_ready + except OperationFailure as e: + if e.code == 18: # Unauthorized Error - i.e. password is not in sync + return False + raise + + return mongos_reachable and mongod_reachable + + def get_shard_members(self) -> List[str]: + """Returns a list of shard members. + + Raises: PyMongoError + """ + mongos_hosts = self.get_mongos_hosts() + with MongosConnection(self.charm.remote_mongos_config(set(mongos_hosts))) as mongo: + return mongo.get_shard_members() + def _is_shard_aware(self) -> bool: """Returns True if shard is in cluster and shard aware.""" if not self.model.get_relation(self.relation_name): diff --git a/src/charm.py b/src/charm.py index a0e96be35..7505f17b8 100755 --- a/src/charm.py +++ b/src/charm.py @@ -554,6 +554,17 @@ def _on_update_status(self, event: UpdateStatusEvent): self.unit.status = WaitingStatus("Waiting for MongoDB to start") return + # Cannot check more advanced MongoDB statuses if the cluster doesn't have passwords synced + # this can occur in two cases: + # 1. password rotation + # 2. race conditions when a new shard is addeded. + if ( + not self.shard.cluster_password_synced() + or not self.config_server.cluster_password_synced() + ): + self.unit.status = WaitingStatus("Waiting to sync passwords across the cluster") + return + # leader should periodically handle configuring the replica set. Incidents such as network # cuts can lead to new IP addresses and therefore will require a reconfigure. Especially # in the case that the leader a change in IP address it will not receive a relation event. diff --git a/terraform/README.md b/terraform/README.md new file mode 100644 index 000000000..fdd0cdd1c --- /dev/null +++ b/terraform/README.md @@ -0,0 +1,58 @@ +# MongoDB Operator Terraform module + +This folder contains a base [Terraform][Terraform] module for the `mongodb` charm. + +The module uses the [Terraform Juju provider][Terraform Juju provider] to model the charm deployment onto any Kubernetes environment managed by [Juju][Juju]. + +The base module is not intended to be deployed in separation (it is possible though), but should rather serve as a building block for higher level modules. + +## Module structure + +- **main.tf** - Defines the Juju application to be deployed. +- **variables.tf** - Allows customization of the deployment such as Juju model name, channel or application name and charm configuration. +- **output.tf** - Responsible for integrating the module with other Terraform modules, primarily by defining potential integration endpoints (charm integrations), but also by exposing the application name. +- **terraform.tf** - Defines the Terraform provider. + +## Using mongodb base module in higher level modules + +If you want to use `mongodb-operator` base module as part of your Terraform module, import it like shown below. + +```text +module "mongodb-operator" { + source = "git::https://github.com/canonical/mongodb-operator.git/terraform" + + model_name = "juju_model_name" + + (Customize configuration variables here if needed) +} +``` + +Please see the link to customize the Grafana configuration variables if needed. + +- [MongoDB configuration options][MongoDB configuration options] + +Create the integrations, for instance: + +```text +resource "juju_integration" "amf-db" { + model = var.model_name + + application { + name = module.amf.app_name + endpoint = module.amf.database_endpoint + } + + application { + name = module.mongodb.app_name + endpoint = module.mongodb.database_endpoint + } +} +``` + +Please check the available [integration pairs][integration pairs]. + +[Terraform]: https://www.terraform.io/ +[Juju]: https://juju.is +[Terraform Juju provider]: https://registry.terraform.io/providers/juju/juju/latest +[MongoDB configuration options]: https://charmhub.io/mongodb/configure?channel=6/edge +[integration pairs]: https://charmhub.io/mongodb/integrations?channel=6/edge diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 000000000..7d9f417ad --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,16 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +resource "juju_application" "mongodb" { + name = var.app_name + model = var.model_name + + charm { + name = "mongodb" + channel = var.channel + base = "ubuntu@22.04" + } + config = var.config + units = 1 + trust = true +} diff --git a/terraform/outputs.tf b/terraform/outputs.tf new file mode 100644 index 000000000..943449446 --- /dev/null +++ b/terraform/outputs.tf @@ -0,0 +1,67 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +output "app_name" { + description = "Name of the deployed application." + value = juju_application.mongodb.name +} + +# Provided integration endpoints + +output "database_endpoint" { + description = "Name of the endpoint to provide the mongodb_client interface." + value = "database" +} + +output "obsolete_endpoint" { + description = "Name of the endpoint to provide the mongodb interface." + value = "obsolete" +} + +output "cos_agent_endpoint" { + description = "Name of the endpoint to provide the cos_agent interface." + value = "cos-agent" +} + +output "config_server_endpoint" { + description = "Name of the endpoint to provide the shards interface." + value = "config-server" +} + +output "cluster_endpoint" { + description = "Name of the endpoint to provide the config-server interface." + value = "cluster" +} + +output "metrics_endpoint" { + description = "Name of the endpoint to provide the prometheus_scrape interface." + value = "metrics-endpoint" +} + +output "grafana_dashboard_endpoint" { + description = "Name of the endpoint to provide the grafana_dashboard interface." + value = "grafana-dashboard" +} + +output "logging_endpoint" { + description = "Name of the endpoint to provide the loki_push_api relation interface." + value = "logging" +} + + +# Required integration endpoints + +output "certificates_endpoint" { + description = "Name of the endpoint to provide the tls-certificates interface." + value = "certificates" +} + +output "s3_credentials_endpoint" { + description = "Name of the endpoint to provide the s3 interface." + value = "s3-credentials" +} + +output "sharding_endpoint" { + description = "Name of the endpoint to provide the shards interface." + value = "sharding" +} diff --git a/terraform/terraform.tf b/terraform/terraform.tf new file mode 100644 index 000000000..4f60bb46b --- /dev/null +++ b/terraform/terraform.tf @@ -0,0 +1,11 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +terraform { + required_providers { + juju = { + source = "juju/juju" + version = "~> 0.10.1" + } + } +} diff --git a/terraform/variables.tf b/terraform/variables.tf new file mode 100644 index 000000000..5c611dfcd --- /dev/null +++ b/terraform/variables.tf @@ -0,0 +1,26 @@ +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +variable "model_name" { + description = "Name of Juju model to deploy application to" + type = string + default = "" +} + +variable "app_name" { + description = "Name of the application in the Juju model" + type = string + default = "mongodb" +} + +variable "channel" { + description = "The channel to use when deploying a charm" + type = string + default = "6/beta" +} + +variable "config" { + description = "Additional configuration for the MongoDB. Details about available options can be found at https://charmhub.io/mongodb/configure?channel=6/beta." + type = map(string) + default = {} +} \ No newline at end of file diff --git a/tests/integration/sharding_tests/test_sharding_backups.py b/tests/integration/sharding_tests/test_sharding_backups.py index 9fcbb0af0..49c32bd95 100644 --- a/tests/integration/sharding_tests/test_sharding_backups.py +++ b/tests/integration/sharding_tests/test_sharding_backups.py @@ -134,7 +134,7 @@ async def test_create_and_list_backups_in_cluster(ops_test: OpsTest) -> None: # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a # backup can take a lot of time so this function returns once the command was successfully # sent to pbm. Therefore we should retry listing the backup several times - for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3), reraise=True): + for attempt in Retrying(stop=stop_after_delay(TIMEOUT), wait=wait_fixed(3), reraise=True): with attempt: backups = await backup_helpers.count_logical_backups(leader_unit) assert backups == 1 @@ -161,6 +161,12 @@ async def test_shards_cannot_run_backup_actions(ops_test: OpsTest) -> None: @pytest.mark.abort_on_fail async def test_rotate_backup_password(ops_test: OpsTest) -> None: """Tests that sharded cluster can successfully create and list backups.""" + await ops_test.model.wait_for_idle( + apps=[CONFIG_SERVER_APP_NAME, SHARD_ONE_APP_NAME, SHARD_TWO_APP_NAME], + idle_period=20, + timeout=TIMEOUT, + status="active", + ) config_leader_id = await get_leader_id(ops_test, app_name=CONFIG_SERVER_APP_NAME) new_password = "new-password" @@ -185,17 +191,24 @@ async def test_rotate_backup_password(ops_test: OpsTest) -> None: apps=[CONFIG_SERVER_APP_NAME, SHARD_ONE_APP_NAME, SHARD_TWO_APP_NAME], idle_period=20, timeout=TIMEOUT, + status="active", ) + config_svr_backup_password = await get_password( + ops_test, username="backup", app_name=CONFIG_SERVER_APP_NAME + ) + assert ( + config_svr_backup_password == new_password + ), "Application config-srver did not rotate password" shard_backup_password = await get_password( ops_test, username="backup", app_name=SHARD_ONE_APP_NAME ) - assert shard_backup_password != new_password, "Application shard-one did not rotate password" + assert shard_backup_password == new_password, "Application shard-one did not rotate password" shard_backup_password = await get_password( ops_test, username="backup", app_name=SHARD_TWO_APP_NAME ) - assert shard_backup_password != new_password, "Application shard-two did not rotate password" + assert shard_backup_password == new_password, "Application shard-two did not rotate password" # verify backup actions work after password rotation leader_unit = await backup_helpers.get_leader_unit(