From fb5397b4c1eb5492ad931dad7e14087016256cf9 Mon Sep 17 00:00:00 2001 From: Adam Dyess Date: Thu, 21 Nov 2024 19:18:01 -0600 Subject: [PATCH 1/7] Rename charm configurations to include new prefixes (#179) * Rename charm configurations to include new prefixes * use accepted node-base library * Improve the docs for service-cidr and pod-cidr --- charms/worker/charmcraft.yaml | 2 +- charms/worker/k8s/charmcraft.yaml | 63 ++++++++++++++------ charms/worker/k8s/requirements.txt | 2 +- charms/worker/k8s/src/charm.py | 19 +++--- charms/worker/k8s/src/kube_control.py | 4 +- charms/worker/k8s/tests/unit/test_base.py | 5 +- tests/integration/data/test-bundle-ceph.yaml | 2 + tests/integration/data/test-bundle-etcd.yaml | 3 +- tests/integration/data/test-bundle.yaml | 2 + tests/integration/test_k8s.py | 2 +- 10 files changed, 69 insertions(+), 35 deletions(-) diff --git a/charms/worker/charmcraft.yaml b/charms/worker/charmcraft.yaml index 6f158ad0..2d7602ed 100644 --- a/charms/worker/charmcraft.yaml +++ b/charms/worker/charmcraft.yaml @@ -60,7 +60,7 @@ bases: config: options: - labels: + node-labels: default: "" type: string description: | diff --git a/charms/worker/k8s/charmcraft.yaml b/charms/worker/k8s/charmcraft.yaml index 8f062a00..72b2b452 100644 --- a/charms/worker/k8s/charmcraft.yaml +++ b/charms/worker/k8s/charmcraft.yaml @@ -81,7 +81,7 @@ config: Example: e.g.: key1=value1 key2=value2 - containerd_custom_registries: + containerd-custom-registries: type: string default: "[]" description: | @@ -128,39 +128,63 @@ config: "key_file": "'"$(base64 -w 0 < ~/my.custom.key.pem)"'", }]' - datastore: + bootstrap-datastore: default: dqlite type: string description: | The datastore to use in Canonical Kubernetes. This cannot be changed after deployment. Allowed values are "dqlite" and "etcd". If "etcd" is chosen, the charm should be integrated with the etcd charm. - labels: - default: "" - type: string - description: | - Labels can be used to organize and to select subsets of nodes in the - cluster. Declare node labels in key=value format, separated by spaces. - register-with-taints: + bootstrap-node-taints: type: string default: "" description: | Space-separated list of taints to apply to this node at registration time. - This config is only used at deploy time when Kubelet first registers the + This config is only used at bootstrap time when Kubelet first registers the node with Kubernetes. To change node taints after deploy time, use kubectl instead. For more information, see the upstream Kubernetes documentation about taints: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ - service-cidr: + bootstrap-pod-cidr: + type: string + default: "10.1.0.0/16" + description: | + Comma-separated CIDR blocks for IP addresses that can be assigned + to pods within the cluster. Can contain at most 2 blocks, one for IPv4 + and one for IPv6. + + After deployment it is not possible to change the size of + the IP range. + + Examples: + - "192.0.2.0/24" + - "2001:db8::/32" + - "192.0.2.0/24,2001:db8::/32" + - "2001:db8::/32,192.0.2.0/24" + bootstrap-service-cidr: type: string default: 10.152.183.0/24 description: | - CIDR to use for Kubernetes services. After deployment it is - only possible to increase the size of the IP range. It is not possible to - change or shrink the address range after deployment. + Comma-separated CIDR blocks for IP addresses that can be assigned + to services within the cluster. Can contain at most 2 blocks, one for IPv4 + and one for IPv6. + + After deployment it is not possible to change the size of + the IP range. + + Examples: + - "192.0.2.0/24" + - "2001:db8::/32" + - "192.0.2.0/24,2001:db8::/32" + - "2001:db8::/32,192.0.2.0/24" + gateway-enabled: + type: boolean + default: false + description: | + Enable/Disable the gateway feature on the cluster. local-storage-enabled: type: boolean default: true @@ -184,16 +208,17 @@ config: "Retain". If set to "Delete", the storage will be deleted when the PersistentVolumeClaim is deleted. If set to "Retain", the storage will be retained when the PersistentVolumeClaim is deleted. - gateway-enabled: - type: boolean - default: false - description: | - Enable/Disable the gateway feature on the cluster. network-enabled: type: boolean default: true description: | Enables or disables the network feature. + node-labels: + default: "" + type: string + description: | + Labels can be used to organize and to select subsets of nodes in the + cluster. Declare node labels in key=value format, separated by spaces. resources: snap-installation: diff --git a/charms/worker/k8s/requirements.txt b/charms/worker/k8s/requirements.txt index 9e532201..3c639667 100644 --- a/charms/worker/k8s/requirements.txt +++ b/charms/worker/k8s/requirements.txt @@ -1,6 +1,6 @@ charm-lib-contextual-status @ git+https://github.com/charmed-kubernetes/charm-lib-contextual-status@255dd4a23defc16dcdac832306e5f460a0f1200c charm-lib-interface-external-cloud-provider @ git+https://github.com/charmed-kubernetes/charm-lib-interface-external-cloud-provider@e1c5fc69e98100a7d43c0ad5a7969bba1ecbcd40 -charm-lib-node-base @ git+https://github.com/charmed-kubernetes/layer-kubernetes-node-base@9b212854e768f13c26cc907bed51444e97e51b50#subdirectory=ops +charm-lib-node-base @ git+https://github.com/charmed-kubernetes/layer-kubernetes-node-base@a14d685237302711113ac651920476437b3b9785#subdirectory=ops charm-lib-reconciler @ git+https://github.com/charmed-kubernetes/charm-lib-reconciler@f818cc30d1a22be43ffdfecf7fbd9c3fd2967502 ops-interface-kube-control @ git+https://github.com/charmed-kubernetes/interface-kube-control.git@main#subdirectory=ops ops.interface_aws @ git+https://github.com/charmed-kubernetes/interface-aws-integration@main#subdirectory=ops diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index 68dd36e2..95fa9950 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -150,7 +150,11 @@ def __init__(self, *args): self.distributor = TokenDistributor(self, self.get_node_name(), self.api_manager) self.collector = TokenCollector(self, self.get_node_name()) self.labeller = LabelMaker( - self, kubeconfig_path=self._internal_kubeconfig, kubectl=KUBECTL_PATH + self, + kubeconfig_path=self._internal_kubeconfig, + kubectl=KUBECTL_PATH, + user_label_key="node-labels", + timeout=15, ) self._stored.set_default(is_dying=False, cluster_name=str()) @@ -324,8 +328,9 @@ def _bootstrap_k8s_snap(self): bootstrap_config = BootstrapConfig.construct() self._configure_datastore(bootstrap_config) bootstrap_config.cluster_config = self._assemble_cluster_config() - bootstrap_config.service_cidr = str(self.config["service-cidr"]) - bootstrap_config.control_plane_taints = str(self.config["register-with-taints"]).split() + bootstrap_config.service_cidr = str(self.config["bootstrap-service-cidr"]) + bootstrap_config.pod_cidr = str(self.config["bootstrap-pod-cidr"]) + bootstrap_config.control_plane_taints = str(self.config["bootstrap-node-taints"]).split() bootstrap_config.extra_sans = [_get_public_address()] bootstrap_config.extra_node_kube_controller_manager_args = { "--cluster-name": self._generate_unique_cluster_name() @@ -354,7 +359,7 @@ def _config_containerd_registries(self): registries, config = [], "" containerd_relation = self.model.get_relation("containerd") if self.is_control_plane: - config = str(self.config["containerd_custom_registries"]) + config = str(self.config["containerd-custom-registries"]) registries = containerd.parse_registries(config) else: registries = containerd.recover(containerd_relation) @@ -416,9 +421,7 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: # https://github.com/canonical/k8s-operator/pull/169/files#r1847378214 ) - gateway = GatewayConfig( - enabled=self.config.get("gateway-enabled"), - ) + gateway = GatewayConfig(enabled=self.config.get("gateway-enabled")) network = NetworkConfig( enabled=self.config.get("network-enabled"), @@ -444,7 +447,7 @@ def _configure_datastore(self, config: Union[BootstrapConfig, UpdateClusterConfi The configuration object for the Kubernetes cluster. This object will be modified in-place to include etcd's configuration details. """ - datastore = self.config.get("datastore") + datastore = self.config.get("bootstrap-datastore") if datastore not in SUPPORTED_DATASTORES: log.error( diff --git a/charms/worker/k8s/src/kube_control.py b/charms/worker/k8s/src/kube_control.py index facb5796..02c8ea2f 100644 --- a/charms/worker/k8s/src/kube_control.py +++ b/charms/worker/k8s/src/kube_control.py @@ -25,8 +25,8 @@ def configure(charm: K8sCharmProtocol): status.add(ops.MaintenanceStatus("Configuring Kube Control")) ca_cert, endpoints = "", [f"https://{binding.network.bind_address}:6443"] - labels = str(charm.model.config["labels"]) - taints = str(charm.model.config["register-with-taints"]) + labels = str(charm.model.config["node-labels"]) + taints = str(charm.model.config["bootstrap-node-taints"]) if charm._internal_kubeconfig.exists(): kubeconfig = yaml.safe_load(charm._internal_kubeconfig.read_text()) cluster = kubeconfig["clusters"][0]["cluster"] diff --git a/charms/worker/k8s/tests/unit/test_base.py b/charms/worker/k8s/tests/unit/test_base.py index dc1038ce..a69a6f45 100644 --- a/charms/worker/k8s/tests/unit/test_base.py +++ b/charms/worker/k8s/tests/unit/test_base.py @@ -144,7 +144,7 @@ def test_configure_datastore_bootstrap_config_etcd(harness): harness.disable_hooks() bs_config = BootstrapConfig() - harness.update_config({"datastore": "etcd"}) + harness.update_config({"bootstrap-datastore": "etcd"}) harness.add_relation("etcd", "etcd") with mock.patch.object(harness.charm, "etcd") as mock_etcd: mock_etcd.is_ready = True @@ -182,7 +182,7 @@ def test_configure_datastore_runtime_config_etcd(harness): pytest.skip("Not applicable on workers") harness.disable_hooks() - harness.update_config({"datastore": "etcd"}) + harness.update_config({"bootstrap-datastore": "etcd"}) harness.add_relation("etcd", "etcd") with mock.patch.object(harness.charm, "etcd") as mock_etcd: mock_etcd.is_ready = True @@ -190,6 +190,7 @@ def test_configure_datastore_runtime_config_etcd(harness): mock_etcd.get_connection_string.return_value = "foo:1234,bar:1234" uccr_config = UpdateClusterConfigRequest() harness.charm._configure_datastore(uccr_config) + assert uccr_config.datastore assert uccr_config.datastore.ca_crt == "" assert uccr_config.datastore.client_crt == "" assert uccr_config.datastore.client_key == "" diff --git a/tests/integration/data/test-bundle-ceph.yaml b/tests/integration/data/test-bundle-ceph.yaml index 8b803e9e..37d4c912 100644 --- a/tests/integration/data/test-bundle-ceph.yaml +++ b/tests/integration/data/test-bundle-ceph.yaml @@ -11,6 +11,8 @@ applications: channel: latest/edge constraints: cores=2 mem=8G root-disk=16G num_units: 1 + options: + bootstrap-node-taints: "node-role.kubernetes.io/control-plane=:NoSchedule" k8s-worker: charm: k8s-worker channel: latest/edge diff --git a/tests/integration/data/test-bundle-etcd.yaml b/tests/integration/data/test-bundle-etcd.yaml index 7446af4f..662c984f 100644 --- a/tests/integration/data/test-bundle-etcd.yaml +++ b/tests/integration/data/test-bundle-etcd.yaml @@ -22,7 +22,8 @@ applications: num_units: 1 constraints: cores=2 mem=8G root-disk=16G options: - datastore: etcd + bootstrap-datastore: etcd + bootstrap-node-taints: "node-role.kubernetes.io/control-plane=:NoSchedule" k8s-worker: charm: k8s-worker channel: latest/edge diff --git a/tests/integration/data/test-bundle.yaml b/tests/integration/data/test-bundle.yaml index dcc5710f..76448c31 100644 --- a/tests/integration/data/test-bundle.yaml +++ b/tests/integration/data/test-bundle.yaml @@ -12,6 +12,8 @@ applications: num_units: 3 constraints: cores=2 mem=8G root-disk=16G expose: true + options: + bootstrap-node-taints: "node-role.kubernetes.io/control-plane=:NoSchedule" k8s-worker: charm: k8s-worker channel: latest/edge diff --git a/tests/integration/test_k8s.py b/tests/integration/test_k8s.py index 258c039d..2e6ee6ea 100644 --- a/tests/integration/test_k8s.py +++ b/tests/integration/test_k8s.py @@ -54,7 +54,7 @@ async def test_nodes_labelled(request, kubernetes_cluster: model.Model): testname: str = request.node.name k8s: application.Application = kubernetes_cluster.applications["k8s"] worker: application.Application = kubernetes_cluster.applications["k8s-worker"] - label_config = {"labels": f"{testname}="} + label_config = {"node-labels": f"{testname}="} await asyncio.gather(k8s.set_config(label_config), worker.set_config(label_config)) await kubernetes_cluster.wait_for_idle(status="active", timeout=10 * 60) From aaf658a54ed7272ba0b4fa32db361a47736d2f78 Mon Sep 17 00:00:00 2001 From: Homayoon Alimohammadi Date: Fri, 22 Nov 2024 15:34:13 +0400 Subject: [PATCH 2/7] Replace AssertionError with ReconcilerError (#173) --- charms/worker/k8s/src/charm.py | 26 +++++++++------- charms/worker/k8s/src/token_distributor.py | 35 +++++++++++++++++----- 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index 95fa9950..39b03f25 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -191,7 +191,6 @@ def _k8s_info(self, event: ops.EventBase): @status.on_error( ops.WaitingStatus("Installing COS requirements"), subprocess.CalledProcessError, - AssertionError, ) def _apply_cos_requirements(self): """Apply COS requirements for integration. @@ -315,7 +314,7 @@ def _check_k8sd_ready(self): @on_error( ops.WaitingStatus("Waiting to bootstrap k8s snap"), - AssertionError, + ReconcilerError, InvalidResponseError, K8sdConnectionError, ) @@ -387,7 +386,7 @@ def _get_valid_annotations(self) -> Optional[dict]: dict: The parsed annotations if valid, otherwise None. Raises: - AssertionError: If any annotation is invalid. + ReconcilerError: If any annotation is invalid. """ raw_annotations = self.config.get("annotations") if not raw_annotations: @@ -398,9 +397,10 @@ def _get_valid_annotations(self) -> Optional[dict]: annotations = {} try: for key, value in [pair.split("=", 1) for pair in raw_annotations.split()]: - assert key and value, "Invalid Annotation" # nosec + if not key or not value: + raise ReconcilerError("Invalid Annotation") annotations[key] = value - except AssertionError: + except ReconcilerError: log.exception("Invalid annotations: %s", raw_annotations) status.add(ops.BlockedStatus("Invalid Annotations")) raise @@ -456,14 +456,18 @@ def _configure_datastore(self, config: Union[BootstrapConfig, UpdateClusterConfi ", ".join(SUPPORTED_DATASTORES), ) status.add(ops.BlockedStatus(f"Invalid datastore: {datastore}")) - assert datastore in SUPPORTED_DATASTORES # nosec + if datastore not in SUPPORTED_DATASTORES: + raise ReconcilerError(f"Invalid datastore: {datastore}") if datastore == "etcd": log.info("Using etcd as external datastore") etcd_relation = self.model.get_relation("etcd") - assert etcd_relation, "Missing etcd relation" # nosec - assert self.etcd.is_ready, "etcd is not ready" # nosec + if not etcd_relation: + raise ReconcilerError("Missing etcd relation") + + if not self.etcd.is_ready: + raise ReconcilerError("etcd is not ready") etcd_config = self.etcd.get_client_credentials() if isinstance(config, BootstrapConfig): @@ -580,7 +584,7 @@ def _enable_functionalities(self): @on_error( WaitingStatus("Ensure that the cluster configuration is up-to-date"), - AssertionError, + ReconcilerError, InvalidResponseError, K8sdConnectionError, ) @@ -618,7 +622,7 @@ def _get_scrape_jobs(self): return self.cos.get_metrics_endpoints( self.get_node_name(), token, self.is_control_plane ) - except AssertionError: + except ReconcilerError: log.exception("Failed to get COS token.") return [] @@ -690,7 +694,7 @@ def _get_proxy_env(self) -> Dict[str, str]: @on_error( WaitingStatus("Waiting for Cluster token"), - AssertionError, + ReconcilerError, InvalidResponseError, K8sdConnectionError, ) diff --git a/charms/worker/k8s/src/token_distributor.py b/charms/worker/k8s/src/token_distributor.py index c3d65c41..d51d91bb 100644 --- a/charms/worker/k8s/src/token_distributor.py +++ b/charms/worker/k8s/src/token_distributor.py @@ -11,6 +11,7 @@ import charms.contextual_status as status import ops +from charms.contextual_status import ReconcilerError from charms.k8s.v0.k8sd_api_manager import ( ErrorCodes, InvalidResponseError, @@ -209,6 +210,9 @@ def cluster_name(self, relation: ops.Relation, local: bool) -> str: Returns: the recovered cluster name from existing relations + + Raises: + ReconcilerError: If fails to find 1 relation-name:cluster-name. """ cluster_name: Optional[str] = "" if not local: @@ -218,7 +222,8 @@ def cluster_name(self, relation: ops.Relation, local: bool) -> str: if value := relation.data[unit].get("cluster-name"): values |= {value} if values: - assert len(values) == 1, f"Failed to find 1 {relation.name}:cluster-name" # nosec + if len(values) != 1: + raise ReconcilerError(f"Failed to find 1 {relation.name}:cluster-name") (cluster_name,) = values elif not (cluster_name := relation.data[self.charm.unit].get("joined")): # joined_cluster_name @@ -235,6 +240,12 @@ def recover_token(self, relation: ops.Relation) -> Generator[str, None, None]: Yields: str: extracted token content + + Raises: + ReconcilerError: + - If fails to find 1 relation-name:secret-id. + - If relation-name:secret-key is not valid. + - If relation-name:token is not valid. """ self.request(relation) @@ -246,14 +257,17 @@ def recover_token(self, relation: ops.Relation) -> Generator[str, None, None]: if (secret_id := relation.data[unit].get(secret_key)) } - assert len(secret_ids) == 1, f"Failed to find 1 {relation.name}:{secret_key}" # nosec + if len(secret_ids) != 1: + raise ReconcilerError(f"Failed to find 1 {relation.name}:{secret_key}") (secret_id,) = secret_ids - assert secret_id, f"{relation.name}:{secret_key} is not valid" # nosec + if not secret_id: + raise ReconcilerError(f"{relation.name}:{secret_key} is not valid") secret = self.charm.model.get_secret(id=secret_id) # Get the content from the secret content = secret.get_content(refresh=True) - assert content["token"], f"{relation.name}:token not valid" # nosec + if not content.get("token"): + raise ReconcilerError(f"{relation.name}:token not valid") yield content["token"] # signal that the relation is joined, the token is used @@ -343,7 +357,7 @@ def update_node(self, relation: ops.Relation, unit: ops.Unit, state: str): """ relation.data[self.charm.app][unit.name] = state - def allocate_tokens( + def allocate_tokens( # noqa: C901 self, relation: ops.Relation, token_strategy: TokenStrategy, @@ -356,16 +370,23 @@ def allocate_tokens( token_strategy (TokenStrategy): The strategy of token creation. token_type (ClusterTokenType): The type of cluster token. Defaults to ClusterTokenType.NONE. + + Raises: + ReconcilerError: + - If token_strategy is valid. + - If remote application doesn't exist on relation. """ units = relation.units if self.charm.app == relation.app: # include self in peer relations units |= {self.charm.unit} - assert relation.app, f"Remote application doesn't exist on {relation.name}" # nosec + if not relation.app: + raise ReconcilerError(f"Remote application doesn't exist on {relation.name}") # Select the appropriate token creation strategy tokenizer = self.token_strategies.get(token_strategy) - assert tokenizer, f"Invalid token_strategy: {token_strategy}" # nosec + if not tokenizer: + raise ReconcilerError(f"Invalid token_strategy: {token_strategy}") log.info("Allocating %s %s tokens", token_type.name.title(), token_strategy.name.title()) status.add( From 109adea9128d75d9b3ca1b1595b3dc64aa339142 Mon Sep 17 00:00:00 2001 From: eaudetcobello Date: Fri, 22 Nov 2024 12:11:21 -0500 Subject: [PATCH 3/7] Add Load Balancer configuration through charm config (#170) * Add cluster config through charm config * initial commit * Filled out the charmcraft with the features --------- Co-authored-by: Benjamin Schimke Co-authored-by: Adam Dyess --- charms/worker/k8s/charmcraft.yaml | 48 +++++++++++++++++++ .../k8s/lib/charms/k8s/v0/k8sd_api_manager.py | 8 ++-- charms/worker/k8s/src/charm.py | 14 ++++++ 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/charms/worker/k8s/charmcraft.yaml b/charms/worker/k8s/charmcraft.yaml index 72b2b452..4746bec8 100644 --- a/charms/worker/k8s/charmcraft.yaml +++ b/charms/worker/k8s/charmcraft.yaml @@ -185,6 +185,54 @@ config: default: false description: | Enable/Disable the gateway feature on the cluster. + load-balancer-enabled: + type: boolean + default: false + description: | + Enable/Disable the load balancer feature on the cluster. + load-balancer-cidrs: + type: string + default: "" + description: | + Space-separated list of CIDRs to use for the load balancer. This is + only used if load-balancer-enabled is set to true. + load-balancer-l2-mode: + type: boolean + default: false + description: | + Enable/Disable L2 mode for the load balancer. This is only used if + load-balancer-enabled is set to true. + load-balancer-l2-interfaces: + type: string + default: "" + description: | + Space-separated list of interfaces to use for the load balancer. This + is only used if load-balancer-l2-mode is set to true. if unset, all + interfaces will be used. + load-balancer-bgp-mode: + type: boolean + default: false + description: | + Enable/Disable BGP mode for the load balancer. This is only used if + load-balancer-enabled is set to true. + load-balancer-bgp-local-asn: + type: int + default: 64512 + description: | + Local ASN for the load balancer. This is only used if load-balancer-bgp-mode + is set to true. + load-balancer-bgp-peer-address: + type: string + default: "" + description: | + Address of the BGP peer for the load balancer. This is only used if + load-balancer-bgp-mode is set to true. + load-balancer-bgp-peer-port: + type: int + default: 179 + description: | + Port of the BGP peer for the load balancer. This is only used if + load-balancer-bgp-mode is set to true. local-storage-enabled: type: boolean default: true diff --git a/charms/worker/k8s/lib/charms/k8s/v0/k8sd_api_manager.py b/charms/worker/k8s/lib/charms/k8s/v0/k8sd_api_manager.py index 6b0cf534..12234de1 100644 --- a/charms/worker/k8s/lib/charms/k8s/v0/k8sd_api_manager.py +++ b/charms/worker/k8s/lib/charms/k8s/v0/k8sd_api_manager.py @@ -230,9 +230,9 @@ class LoadBalancerConfig(BaseModel, allow_population_by_field_name=True): Attributes: enabled: Optional flag which represents the status of LoadBalancer. cidrs: List of CIDR blocks for the load balancer. - l2_enabled: Optional flag to enable or disable layer 2 functionality. + l2_mode: Optional flag to enable or disable layer 2 mode. l2_interfaces: List of layer 2 interfaces for the load balancer. - bgp_enabled: Optional flag to enable or disable BGP. + bgp_mode: Optional flag to enable or disable BGP. bgp_local_asn: The local ASN for BGP configuration. bgp_peer_address: The peer address for BGP configuration. bgp_peer_asn: The peer ASN for BGP configuration. @@ -241,9 +241,9 @@ class LoadBalancerConfig(BaseModel, allow_population_by_field_name=True): enabled: Optional[bool] = Field(default=None) cidrs: Optional[List[str]] = Field(default=None) - l2_enabled: Optional[bool] = Field(default=None, alias="l2-enabled") + l2_mode: Optional[bool] = Field(default=None, alias="l2-mode") l2_interfaces: Optional[List[str]] = Field(default=None, alias="l2-interfaces") - bgp_enabled: Optional[bool] = Field(default=None, alias="bgp-enabled") + bgp_mode: Optional[bool] = Field(default=None, alias="bgp-mode") bgp_local_asn: Optional[int] = Field(default=None, alias="bgp-local-asn") bgp_peer_address: Optional[str] = Field(default=None, alias="bgp-peer-address") bgp_peer_asn: Optional[int] = Field(default=None, alias="bgp-peer-asn") diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index 39b03f25..6936b219 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -45,6 +45,7 @@ JoinClusterRequest, K8sdAPIManager, K8sdConnectionError, + LoadBalancerConfig, LocalStorageConfig, NetworkConfig, UnixSocketConnectionFactory, @@ -427,6 +428,18 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: enabled=self.config.get("network-enabled"), ) + load_balancer = LoadBalancerConfig( + enabled=self.config.get("load-balancer-enabled"), + cidrs=str(self.config.get("load-balancer-cidrs")).split(), + l2_mode=self.config.get("load-balancer-l2-mode"), + l2_interfaces=str(self.config.get("load-balancer-l2-interfaces")).split(), + bgp_mode=self.config.get("load-balancer-bgp-mode"), + bgp_local_asn=self.config.get("load-balancer-bgp-local-asn"), + bgp_peer_address=self.config.get("load-balancer-bgp-peer-address"), + bgp_peer_asn=self.config.get("load-balancer-bgp-peer-asn"), + bgp_peer_port=self.config.get("load-balancer-bgp-peer-port"), + ) + cloud_provider = None if self.xcp.has_xcp: cloud_provider = "external" @@ -437,6 +450,7 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: network=network, annotations=self._get_valid_annotations(), cloud_provider=cloud_provider, + load_balancer=load_balancer, ) def _configure_datastore(self, config: Union[BootstrapConfig, UpdateClusterConfigRequest]): From 9909a91458402ed863c2d19d6e59b123422b9a93 Mon Sep 17 00:00:00 2001 From: Adam Dyess Date: Fri, 22 Nov 2024 12:55:38 -0600 Subject: [PATCH 4/7] Add charm config for metrics-server and dns-config (#183) --- charms/worker/k8s/charmcraft.yaml | 122 ++++++++++++++-------- charms/worker/k8s/src/charm.py | 43 ++++---- charms/worker/k8s/tests/unit/test_base.py | 1 - 3 files changed, 95 insertions(+), 71 deletions(-) diff --git a/charms/worker/k8s/charmcraft.yaml b/charms/worker/k8s/charmcraft.yaml index 4746bec8..09ec05cf 100644 --- a/charms/worker/k8s/charmcraft.yaml +++ b/charms/worker/k8s/charmcraft.yaml @@ -81,6 +81,58 @@ config: Example: e.g.: key1=value1 key2=value2 + bootstrap-datastore: + default: dqlite + type: string + description: | + The datastore to use in Canonical Kubernetes. This cannot be changed + after deployment. Allowed values are "dqlite" and "etcd". If "etcd" is + chosen, the charm should be integrated with the etcd charm. + bootstrap-node-taints: + type: string + default: "" + description: | + Space-separated list of taints to apply to this node at registration time. + + This config is only used at bootstrap time when Kubelet first registers the + node with Kubernetes. To change node taints after deploy time, use kubectl + instead. + + For more information, see the upstream Kubernetes documentation about + taints: + https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + bootstrap-pod-cidr: + type: string + default: "10.1.0.0/16" + description: | + Comma-separated CIDR blocks for IP addresses that can be assigned + to pods within the cluster. Can contain at most 2 blocks, one for IPv4 + and one for IPv6. + + After deployment it is not possible to change the size of + the IP range. + + Examples: + - "192.0.2.0/24" + - "2001:db8::/32" + - "192.0.2.0/24,2001:db8::/32" + - "2001:db8::/32,192.0.2.0/24" + bootstrap-service-cidr: + type: string + default: 10.152.183.0/24 + description: | + Comma-separated CIDR blocks for IP addresses that can be assigned + to services within the cluster. Can contain at most 2 blocks, one for IPv4 + and one for IPv6. + + After deployment it is not possible to change the size of + the IP range. + + Examples: + - "192.0.2.0/24" + - "2001:db8::/32" + - "192.0.2.0/24,2001:db8::/32" + - "2001:db8::/32,192.0.2.0/24" containerd-custom-registries: type: string default: "[]" @@ -127,59 +179,32 @@ config: "cert_file": "'"$(base64 -w 0 < ~/my.custom.cert.pem)"'", "key_file": "'"$(base64 -w 0 < ~/my.custom.key.pem)"'", }]' - - bootstrap-datastore: - default: dqlite - type: string + dns-enabled: + type: boolean + default: true description: | - The datastore to use in Canonical Kubernetes. This cannot be changed - after deployment. Allowed values are "dqlite" and "etcd". If "etcd" is - chosen, the charm should be integrated with the etcd charm. - bootstrap-node-taints: + Enable/Disable the DNS feature on the cluster. + dns-cluster-domain: type: string - default: "" + default: "cluster.local" description: | - Space-separated list of taints to apply to this node at registration time. - - This config is only used at bootstrap time when Kubelet first registers the - node with Kubernetes. To change node taints after deploy time, use kubectl - instead. - - For more information, see the upstream Kubernetes documentation about - taints: - https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ - bootstrap-pod-cidr: + Sets the local domain of the cluster + dns-service-ip: type: string - default: "10.1.0.0/16" + default: "" description: | - Comma-separated CIDR blocks for IP addresses that can be assigned - to pods within the cluster. Can contain at most 2 blocks, one for IPv4 - and one for IPv6. - - After deployment it is not possible to change the size of - the IP range. - - Examples: - - "192.0.2.0/24" - - "2001:db8::/32" - - "192.0.2.0/24,2001:db8::/32" - - "2001:db8::/32,192.0.2.0/24" - bootstrap-service-cidr: + Sets the IP address of the dns service. If omitted defaults to the IP address + of the Kubernetes service created by the feature. + + Can be used to point to an external dns server when feature is disabled. + dns-upstream-nameservers: type: string - default: 10.152.183.0/24 + default: "" description: | - Comma-separated CIDR blocks for IP addresses that can be assigned - to services within the cluster. Can contain at most 2 blocks, one for IPv4 - and one for IPv6. - - After deployment it is not possible to change the size of - the IP range. - - Examples: - - "192.0.2.0/24" - - "2001:db8::/32" - - "192.0.2.0/24,2001:db8::/32" - - "2001:db8::/32,192.0.2.0/24" + Space-separated list of upstream nameservers used to forward queries for out-of-cluster + endpoints. + + If omitted defaults to `/etc/resolv.conf` and uses the nameservers on each node. gateway-enabled: type: boolean default: false @@ -256,6 +281,11 @@ config: "Retain". If set to "Delete", the storage will be deleted when the PersistentVolumeClaim is deleted. If set to "Retain", the storage will be retained when the PersistentVolumeClaim is deleted. + metrics-server-enabled: + type: boolean + default: true + description: | + Enable/Disable the metrics-server feature on the cluster. network-enabled: type: boolean default: true diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index 6936b219..e57809bc 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -47,6 +47,7 @@ K8sdConnectionError, LoadBalancerConfig, LocalStorageConfig, + MetricsServerConfig, NetworkConfig, UnixSocketConnectionFactory, UpdateClusterConfigRequest, @@ -422,12 +423,24 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: # https://github.com/canonical/k8s-operator/pull/169/files#r1847378214 ) + dns_config = DNSConfig( + enabled=self.config.get("dns-enabled"), + ) + if cfg := self.config.get("dns-cluster-domain"): + dns_config.cluster_domain = str(cfg) + if cfg := self.config.get("dns-service-ip"): + dns_config.service_ip = str(cfg) + if cfg := self.config.get("dns-upstream-nameservers"): + dns_config.upstream_nameservers = str(cfg).split() + gateway = GatewayConfig(enabled=self.config.get("gateway-enabled")) network = NetworkConfig( enabled=self.config.get("network-enabled"), ) + metrics_server = MetricsServerConfig(enabled=self.config.get("metrics-server-enabled")) + load_balancer = LoadBalancerConfig( enabled=self.config.get("load-balancer-enabled"), cidrs=str(self.config.get("load-balancer-cidrs")).split(), @@ -445,12 +458,14 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: cloud_provider = "external" return UserFacingClusterConfig( - local_storage=local_storage, - gateway=gateway, - network=network, annotations=self._get_valid_annotations(), cloud_provider=cloud_provider, + dns_config=dns_config, + gateway=gateway, + local_storage=local_storage, load_balancer=load_balancer, + metrics_server=metrics_server, + network=network, ) def _configure_datastore(self, config: Union[BootstrapConfig, UpdateClusterConfigRequest]): @@ -577,25 +592,6 @@ def _create_cos_tokens(self): token_type=ClusterTokenType.WORKER, ) - @on_error( - WaitingStatus("Waiting to enable features"), - InvalidResponseError, - K8sdConnectionError, - ) - def _enable_functionalities(self): - """Enable necessary components for the Kubernetes cluster.""" - status.add(ops.MaintenanceStatus("Updating K8s features")) - log.info("Enabling K8s features") - dns_config = DNSConfig(enabled=True) - network_config = NetworkConfig(enabled=True) - local_storage_config = LocalStorageConfig(enabled=True) - user_cluster_config = UserFacingClusterConfig( - dns=dns_config, network=network_config, local_storage=local_storage_config - ) - update_request = UpdateClusterConfigRequest(config=user_cluster_config) - - self.api_manager.update_cluster_config(update_request) - @on_error( WaitingStatus("Ensure that the cluster configuration is up-to-date"), ReconcilerError, @@ -794,12 +790,11 @@ def _reconcile(self, event: ops.EventBase): if self.lead_control_plane: self._k8s_info(event) self._bootstrap_k8s_snap() - self._enable_functionalities() + self._ensure_cluster_config() self._create_cluster_tokens() self._create_cos_tokens() self._apply_cos_requirements() self._revoke_cluster_tokens(event) - self._ensure_cluster_config() self._announce_kubernetes_version() self._join_cluster(event) self._config_containerd_registries() diff --git a/charms/worker/k8s/tests/unit/test_base.py b/charms/worker/k8s/tests/unit/test_base.py index a69a6f45..6a3f419a 100644 --- a/charms/worker/k8s/tests/unit/test_base.py +++ b/charms/worker/k8s/tests/unit/test_base.py @@ -58,7 +58,6 @@ def mock_reconciler_handlers(harness): if harness.charm.is_control_plane: handler_names |= { "_bootstrap_k8s_snap", - "_enable_functionalities", "_create_cluster_tokens", "_create_cos_tokens", "_apply_cos_requirements", From 25370eb4bc268e11af0991dec1495c91e436718c Mon Sep 17 00:00:00 2001 From: Homayoon Alimohammadi Date: Sat, 23 Nov 2024 01:30:18 +0400 Subject: [PATCH 5/7] Add Ingress feature configuration (#176) * Add Ingress feature config options * Remove defaultTLS option --- charms/worker/k8s/charmcraft.yaml | 10 +++++++++ charms/worker/k8s/src/charm.py | 7 ++++++ .../k8s/tests/unit/test_config_options.py | 22 +++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/charms/worker/k8s/charmcraft.yaml b/charms/worker/k8s/charmcraft.yaml index 09ec05cf..0f685cc3 100644 --- a/charms/worker/k8s/charmcraft.yaml +++ b/charms/worker/k8s/charmcraft.yaml @@ -291,6 +291,16 @@ config: default: true description: | Enables or disables the network feature. + ingress-enabled: + type: boolean + default: false + description: | + Determines if the ingress feature should be enabled. + ingress-enable-proxy-protocol: + type: boolean + default: false + description: | + Determines if the proxy protocol should be enabled for ingresses. node-labels: default: "" type: string diff --git a/charms/worker/k8s/src/charm.py b/charms/worker/k8s/src/charm.py index e57809bc..edaaa0cf 100755 --- a/charms/worker/k8s/src/charm.py +++ b/charms/worker/k8s/src/charm.py @@ -41,6 +41,7 @@ CreateClusterRequest, DNSConfig, GatewayConfig, + IngressConfig, InvalidResponseError, JoinClusterRequest, K8sdAPIManager, @@ -439,6 +440,11 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: enabled=self.config.get("network-enabled"), ) + ingress = IngressConfig( + enabled=self.config.get("ingress-enabled"), + enable_proxy_protocol=self.config.get("ingress-enable-proxy-protocol"), + ) + metrics_server = MetricsServerConfig(enabled=self.config.get("metrics-server-enabled")) load_balancer = LoadBalancerConfig( @@ -462,6 +468,7 @@ def _assemble_cluster_config(self) -> UserFacingClusterConfig: cloud_provider=cloud_provider, dns_config=dns_config, gateway=gateway, + ingress=ingress, local_storage=local_storage, load_balancer=load_balancer, metrics_server=metrics_server, diff --git a/charms/worker/k8s/tests/unit/test_config_options.py b/charms/worker/k8s/tests/unit/test_config_options.py index 13bb4f49..7a12be6c 100644 --- a/charms/worker/k8s/tests/unit/test_config_options.py +++ b/charms/worker/k8s/tests/unit/test_config_options.py @@ -50,3 +50,25 @@ def test_configure_network_options(harness): harness.update_config({"network-enabled": True}) ufcg = harness.charm._assemble_cluster_config() assert ufcg.network.enabled, "Network should be enabled" + + +def test_configure_ingress_options(harness): + """Test configuring the ingress options. + + Args: + harness: the harness under test + """ + if harness.charm.is_worker: + pytest.skip("Not applicable on workers") + + harness.disable_hooks() + + enabled = True + proxy_protocol_enabled = True + + harness.update_config({"ingress-enabled": enabled}) + harness.update_config({"ingress-enable-proxy-protocol": proxy_protocol_enabled}) + + ufcg = harness.charm._assemble_cluster_config() + assert ufcg.ingress.enabled == enabled + assert ufcg.ingress.enable_proxy_protocol == proxy_protocol_enabled From 8320b91d4a6b7b5c238cd47d3be63ff990823bbb Mon Sep 17 00:00:00 2001 From: Adam Dyess Date: Fri, 22 Nov 2024 16:42:15 -0600 Subject: [PATCH 6/7] Reduce machine count for running ceph tests (#186) --- tests/integration/conftest.py | 4 +++- tests/integration/data/test-bundle-ceph.yaml | 12 ++---------- tests/integration/test_ceph.py | 5 +---- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index f4a147a6..7ec676be 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -241,7 +241,9 @@ def switch(self, name: str, path: Optional[Path] = None, channel: Optional[str] Raises: ValueError: if both path and channel are provided, or neither are provided """ - app = self.applications[name] + app = self.applications.get(name) + if not app: + return # Skip if the application is not in the bundle if (not path and not channel) or (path and channel): raise ValueError("channel and path are mutually exclusive") if path: diff --git a/tests/integration/data/test-bundle-ceph.yaml b/tests/integration/data/test-bundle-ceph.yaml index 37d4c912..4f93361e 100644 --- a/tests/integration/data/test-bundle-ceph.yaml +++ b/tests/integration/data/test-bundle-ceph.yaml @@ -11,13 +11,6 @@ applications: channel: latest/edge constraints: cores=2 mem=8G root-disk=16G num_units: 1 - options: - bootstrap-node-taints: "node-role.kubernetes.io/control-plane=:NoSchedule" - k8s-worker: - charm: k8s-worker - channel: latest/edge - constraints: cores=2 mem=8G root-disk=16G - num_units: 1 ceph-csi: charm: ceph-csi channel: latest/stable @@ -30,17 +23,16 @@ applications: num_units: 1 options: monitor-count: 1 - expected-osd-count: 1 + expected-osd-count: 2 ceph-osd: charm: ceph-osd channel: quincy/stable constraints: cores=2 mem=4G root-disk=16G - num_units: 3 + num_units: 2 storage: osd-devices: 1G,1 osd-journals: 1G,1 relations: - - [k8s, k8s-worker:cluster] - [ceph-csi, k8s:ceph-k8s-info] - [ceph-csi, ceph-mon:client] - [ceph-mon, ceph-osd:mon] diff --git a/tests/integration/test_ceph.py b/tests/integration/test_ceph.py index 61bcb39f..ae1a4732 100644 --- a/tests/integration/test_ceph.py +++ b/tests/integration/test_ceph.py @@ -15,10 +15,7 @@ # This pytest mark configures the test environment to use the Canonical Kubernetes # bundle with ceph, for all the test within this module. -pytestmark = [ - pytest.mark.bundle_file("test-bundle-ceph.yaml"), - pytest.mark.ignore_blocked, -] +pytestmark = [pytest.mark.bundle_file("test-bundle-ceph.yaml")] def _get_data_file_path(name) -> str: From 69c25661a411220fc3e7392ecf7e35019dfad7ad Mon Sep 17 00:00:00 2001 From: Mateo Florido Date: Fri, 22 Nov 2024 18:30:52 -0500 Subject: [PATCH 7/7] Fix Upgrade Check and Literals (#185) Co-authored-by: Adam Dyess --- charms/worker/k8s/src/inspector.py | 23 +++++++++++++++---- charms/worker/k8s/src/literals.py | 4 ++-- charms/worker/k8s/src/upgrade.py | 2 +- .../worker/k8s/tests/unit/test_inspector.py | 5 ++-- charms/worker/k8s/tests/unit/test_upgrade.py | 8 ++++--- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/charms/worker/k8s/src/inspector.py b/charms/worker/k8s/src/inspector.py index 1b3f1a18..783b5ea1 100644 --- a/charms/worker/k8s/src/inspector.py +++ b/charms/worker/k8s/src/inspector.py @@ -57,14 +57,27 @@ def get_nodes(self, labels: LabelSelector) -> Optional[List[Node]]: ClusterInspectorError: If the nodes cannot be retrieved. """ client = self._get_client() - unready_nodes = [] try: - for node in client.list(Node, labels=labels): - if node.status != "Ready": - unready_nodes.append(node) + + def is_node_not_ready(node: Node) -> bool: + """Check if a node is not ready. + + Args: + node: The node to check. + + Returns: + True if the node is not ready, False otherwise. + """ + if not node.status or not node.status.conditions: + return True + return any( + condition.type == "Ready" and condition.status != "True" + for condition in node.status.conditions + ) + + return [node for node in client.list(Node, labels=labels) if is_node_not_ready(node)] except ApiError as e: raise ClusterInspector.ClusterInspectorError(f"Failed to get nodes: {e}") from e - return unready_nodes or None def verify_pods_running(self, namespaces: List[str]) -> Optional[str]: """Verify that all pods in the specified namespaces are running. diff --git a/charms/worker/k8s/src/literals.py b/charms/worker/k8s/src/literals.py index 656f95af..950b9edd 100644 --- a/charms/worker/k8s/src/literals.py +++ b/charms/worker/k8s/src/literals.py @@ -13,9 +13,9 @@ }, # NOTE: Update the dependencies for the k8s-service before releasing. "k8s_service": { - "dependencies": {"k8s-worker": "^1.31.0"}, + "dependencies": {"k8s-worker": "^1.30, < 1.32"}, "name": "k8s", - "upgrade_supported": "^1.30.0", + "upgrade_supported": "^1.30, < 1.32", "version": "1.31.2", }, } diff --git a/charms/worker/k8s/src/upgrade.py b/charms/worker/k8s/src/upgrade.py index fc2548f6..4c8efef1 100644 --- a/charms/worker/k8s/src/upgrade.py +++ b/charms/worker/k8s/src/upgrade.py @@ -66,7 +66,7 @@ def pre_upgrade_check(self) -> None: if unready_nodes: raise ClusterNotReadyError( message="Cluster is not ready for an upgrade", - cause=f"Nodes not ready: {', '.join(unready_nodes)}", + cause=f"Nodes not ready: {', '.join(node.metadata.name for node in unready_nodes)}", resolution="""Node(s) may be in a bad state. Please check the node(s) for more information.""", ) diff --git a/charms/worker/k8s/tests/unit/test_inspector.py b/charms/worker/k8s/tests/unit/test_inspector.py index 2e532095..cb3c76eb 100644 --- a/charms/worker/k8s/tests/unit/test_inspector.py +++ b/charms/worker/k8s/tests/unit/test_inspector.py @@ -10,6 +10,7 @@ from inspector import ClusterInspector from lightkube.core.exceptions import ApiError +from lightkube.models.core_v1 import NodeCondition from lightkube.resources.core_v1 import Node, Pod @@ -25,11 +26,11 @@ def setUp(self): def test_get_nodes_returns_unready(self): """Test that get_nodes returns unready nodes.""" mock_node1 = MagicMock(spec=Node) - mock_node1.status = "Ready" + mock_node1.status.conditions = [NodeCondition(type="Ready", status="True")] mock_node1.metadata.name = "node1" mock_node2 = MagicMock(spec=Node) - mock_node2.status = "NotReady" + mock_node2.status.conditions = [NodeCondition(type="Ready", status="False")] mock_node2.metadata.name = "node2" self.mock_client.list.return_value = [mock_node1, mock_node2] diff --git a/charms/worker/k8s/tests/unit/test_upgrade.py b/charms/worker/k8s/tests/unit/test_upgrade.py index edeb5003..60e66bb4 100644 --- a/charms/worker/k8s/tests/unit/test_upgrade.py +++ b/charms/worker/k8s/tests/unit/test_upgrade.py @@ -8,6 +8,8 @@ from charms.data_platform_libs.v0.upgrade import ClusterNotReadyError from inspector import ClusterInspector +from lightkube.models.core_v1 import Node +from lightkube.models.meta_v1 import ObjectMeta from upgrade import K8sDependenciesModel, K8sUpgrade @@ -66,9 +68,9 @@ def test_pre_upgrade_check_unready_nodes(self): """Test pre_upgrade_check fails when nodes are not ready.""" self.charm.is_worker = True self.node_manager.get_nodes.return_value = [ - "worker-1", - "worker-2", - "worker-3", + Node(metadata=ObjectMeta(name="worker-1")), + Node(metadata=ObjectMeta(name="worker-2")), + Node(metadata=ObjectMeta(name="worker-3")), ] with self.assertRaises(ClusterNotReadyError):