Skip to content

Commit

Permalink
fix: handle invalid combination of external_hostname and routing_path (
Browse files Browse the repository at this point in the history
…#420)

Previously, this charm accepted the following configuration:
* routing_mode=subdomain
* external_hostname="" # (unset)

When external_hostname is unset, the url provided for any application related by ingress uses the LoadBalancer's external address, which may be an IP.  In these cases, it would provide charms urls like `model.app.1.2.3.4`, which are invalid URLs (the last segment of a URL cannot be all-numeric).  This led to an uncaught pydantic validation error when calling `ipa.publish_url()` because that method includes validation of the URL, putting the charm in Error state.  An example of this is shown in #361

Ideally, the fix here would be to validate the charm configuration+LoadBalancer details and halt charm execution if the configuration was invalid, putting the charm into BlockedStatus until resolved.  The problem is that the current architecture of this charm makes that solution challenging.  The charm is designed to atomically handle events (doing only the work a particular event needs) rather than holistically (recomputing the world on each event), meaning that skipping or losing track of events leads to undesired charm states.  Also, `ipa.publish_url()` is called deep in most (all?) event handlers, making it difficult to properly handle these errors at the charm level without a major refactor of the charm.

As a compromise, the following has been done:
* the traefik_k8s/v2/ingress.py library's `publish_url` method has been updated to catch the pydantic validation error cited in #361 and log it rather than raise it to the charm.  The library then writes ingress=None to the databag instead of the invalid URL, giving a soft indication to the user that the url is invalid.
* the config.yaml descriptions for routing_mode and external_hostname have been updated to explain the incompatibility in these settings
* config validation has been added to the __init__ of the charm for routing_mode and external_hostname.  If routing_mode==subdomain and hostname is unset, the charm will log warnings for the user about the possible incompatibility (but will not block the charm)

The upshot of these changes is that this charm will:
* not go into an unresponsive error state
* as best it can given the current charm architecture, warn the user of the misconfiguration
* not risk losing events through defer or getting event sequencing wrong

Fixes #361
  • Loading branch information
ca-scribner authored Nov 15, 2024
1 parent 20d3656 commit f824025
Show file tree
Hide file tree
Showing 6 changed files with 160 additions and 16 deletions.
3 changes: 3 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ options:
will provide to the unit `my-unit/2` in the `my-model` model the following URL:
`http://my-model-my-unit-2.foo:8080`
Note that, for 'subdomain' routing mode, the external_hostname must be set and not be set to an IP address. This
is because subdomains are not supported for IP addresses.
type: string
default: path

Expand Down
40 changes: 34 additions & 6 deletions lib/charms/traefik_k8s/v2/ingress.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,18 @@ def _on_ingress_revoked(self, event: IngressPerAppRevokedEvent):
import typing
from dataclasses import dataclass
from functools import partial
from typing import Any, Callable, Dict, List, MutableMapping, Optional, Sequence, Tuple, Union
from typing import (
Any,
Callable,
Dict,
List,
MutableMapping,
Optional,
Sequence,
Tuple,
Union,
cast,
)

import pydantic
from ops.charm import CharmBase, RelationBrokenEvent, RelationEvent
Expand Down Expand Up @@ -226,7 +237,7 @@ class IngressUrl(BaseModel):
class IngressProviderAppData(DatabagModel):
"""Ingress application databag schema."""

ingress: IngressUrl
ingress: Optional[IngressUrl] = None


class ProviderSchema(BaseModel):
Expand Down Expand Up @@ -558,7 +569,16 @@ def _published_url(self, relation: Relation) -> Optional["IngressProviderAppData
def publish_url(self, relation: Relation, url: str):
"""Publish to the app databag the ingress url."""
ingress_url = {"url": url}
IngressProviderAppData(ingress=ingress_url).dump(relation.data[self.app]) # type: ignore
try:
IngressProviderAppData(ingress=ingress_url).dump(relation.data[self.app]) # type: ignore
except pydantic.ValidationError as e:
# If we cannot validate the url as valid, publish an empty databag and log the error.
log.error(f"Failed to validate ingress url '{url}' - got ValidationError {e}")
log.error(
"url was not published to ingress relation for {relation.app}. This error is likely due to an"
" error or misconfiguration of the charm calling this library."
)
IngressProviderAppData(ingress=None).dump(relation.data[self.app]) # type: ignore

@property
def proxied_endpoints(self) -> Dict[str, Dict[str, str]]:
Expand Down Expand Up @@ -596,10 +616,14 @@ def proxied_endpoints(self) -> Dict[str, Dict[str, str]]:
if not ingress_data:
log.warning(f"relation {ingress_relation} not ready yet: try again in some time.")
continue

# Validation above means ingress cannot be None, but type checker doesn't know that.
ingress = ingress_data.ingress
ingress = cast(IngressProviderAppData, ingress)
if PYDANTIC_IS_V1:
results[ingress_relation.app.name] = ingress_data.ingress.dict()
results[ingress_relation.app.name] = ingress.dict()
else:
results[ingress_relation.app.name] = ingress_data.ingress.model_dump(mode="json")
results[ingress_relation.app.name] = ingress.model_dump(mode="json")
return results


Expand Down Expand Up @@ -834,7 +858,11 @@ def _get_url_from_relation_data(self) -> Optional[str]:
if not databag: # not ready yet
return None

return str(IngressProviderAppData.load(databag).ingress.url)
ingress = IngressProviderAppData.load(databag).ingress
if ingress is None:
return None

return str(ingress.url)

@property
def url(self) -> Optional[str]:
Expand Down
64 changes: 63 additions & 1 deletion src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import itertools
import json
import logging
import re
import socket
from typing import Any, Dict, List, Optional, Tuple, Union, cast
from urllib.parse import urlparse
Expand Down Expand Up @@ -126,6 +127,20 @@ class TraefikIngressCharm(CharmBase):
def __init__(self, *args):
super().__init__(*args)

# Before doing anything, validate the charm config. If configuration is invalid, warn the user.
# FIXME: Invalid configuration here SHOULD halt the charm's operation until resolution, or at least make a
# persistent BlockedStatus, but this charm handles events atomically rather than holistically.
# This means that skipping events will result in unexpected issues, so if we halt the charm here we must
# ensure the charm processes all backlogged events on resumption in the original order. Rather than do that
# and risk losing an event, we simply warn the user and continue functioning as best as possible. The charm
# will operate correctly except that it will not publish ingress urls to related applications, instead
# leaving the ingress relation data empty and logging an error.
# If we refactor this charm to handle events holistically (and thus we can skip events without issue), we
# should refactor this validation truly halt the charm.
# If we refactor this charm to use collect_unit_status, we could raise a persistent BlockedStatus message when
# this configuration is invalid.
self._validate_config()

self._stored.set_default(
config_hash=None,
)
Expand Down Expand Up @@ -1186,13 +1201,60 @@ def server_cert_sans_dns(self) -> List[str]:
# If all else fails, we'd rather use the bare IP
return [target] if target else []

def _validate_config(self):
"""Validate the charm configuration, emitting warning messages on misconfigurations.
In scope for this validation is:
* validating the combination of external_hostname and routing_mode
"""
# FIXME: This will false positive in cases where the LoadBalancer provides an external host rather than an IP.
# The warning will occur, but the charm will function normally. We could better validate the LoadBalancer if
# we want to avoid this, but it probably isn't worth the effort until someone notices.
invalid_hostname_and_routing_mode_message = (
"Likely configuration error: When using routing_mode=='subdomain', external_hostname should be "
"set. This is because when external_hostname is unset, Traefik uses the LoadBalancer's address as the "
"hostname for all provided URLS and that hostname is typically an IP address. This leads to invalid urls "
"like `model-app.1.2.3.4`. The charm will continue to operate as currently set, but will not provide urls"
" to any related applications if they would be invalid."
)

if self.config.get("routing_mode", "") == "subdomain":
# subdomain mode can only be used if an external_hostname is set and is not an IP address
external_hostname = self.config.get("external_hostname", "")
if not isinstance(external_hostname, str) or not is_valid_hostname(external_hostname):
logger.warning(invalid_hostname_and_routing_mode_message)


def is_valid_hostname(hostname: str) -> bool:
"""Check if a hostname is valid.
Modified from https://stackoverflow.com/a/33214423
"""
if len(hostname) == 0:
return False
if hostname[-1] == ".":
# strip exactly one dot from the right, if present
hostname = hostname[:-1]
if len(hostname) > 253:
return False

labels = hostname.split(".")

# the TLD must be not all-numeric
if re.match(r"[0-9]+$", labels[-1]):
return False

allowed = re.compile(r"(?!-)[a-z0-9-]{1,63}(?<!-)$", re.IGNORECASE)
return all(allowed.match(label) for label in labels)


@functools.lru_cache
def _get_loadbalancer_status(namespace: str, service_name: str) -> Optional[str]:
client = Client() # type: ignore
try:
traefik_service = client.get(Service, name=service_name, namespace=namespace)
except ApiError:
except ApiError as e:
logger.warning(f"Got ApiError when trying to get Loadbalancer status: {e}")
return None

if not (status := traefik_service.status): # type: ignore
Expand Down
8 changes: 4 additions & 4 deletions tests/scenario/conftest.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
from unittest.mock import PropertyMock, patch
from unittest.mock import patch

import pytest
from ops import pebble
from scenario import Container, Context, ExecOutput, Model, Mount

from charm import TraefikIngressCharm

MOCK_EXTERNAL_HOSTNAME = "testhostname"
MOCK_LB_ADDRESS = "1.2.3.4"


@pytest.fixture
def traefik_charm():
with patch("charm.KubernetesServicePatch"):
with patch("lightkube.core.client.GenericSyncClient"):
with patch(
"charm.TraefikIngressCharm._external_host",
PropertyMock(return_value=MOCK_EXTERNAL_HOSTNAME),
"charm._get_loadbalancer_status",
return_value=MOCK_LB_ADDRESS,
):
yield TraefikIngressCharm

Expand Down
53 changes: 52 additions & 1 deletion tests/scenario/test_ingress_per_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
IngressRequirerUnitData,
)
from ops import CharmBase, Framework
from scenario import Context, Mount, Relation, State
from scenario import Context, Model, Mount, Relation, State

from tests.scenario._utils import create_ingress_relation
from tests.scenario.conftest import MOCK_LB_ADDRESS


@pytest.mark.parametrize(
Expand Down Expand Up @@ -301,3 +302,53 @@ def test_proxied_endpoints(
assert charm.ingress_per_appv1.proxied_endpoints["remote"]["url"]
assert charm.ingress_per_appv2.proxied_endpoints["remote"]["url"]
assert charm.ingress_per_unit.proxied_endpoints["remote/0"]["url"]


MODEL_NAME = "test-model"
UNIT_NAME = "nms"


@pytest.mark.parametrize(
"external_hostname, routing_mode, expected_local_app_data",
[
# Valid configurations
(
"foo.com",
"path",
{"ingress": json.dumps({"url": f"http://foo.com/{MODEL_NAME}-{UNIT_NAME}"})},
),
(
"foo.com",
"subdomain",
{"ingress": json.dumps({"url": f"http://{MODEL_NAME}-{UNIT_NAME}.foo.com/"})},
),
(
"",
"path",
{"ingress": json.dumps({"url": f"http://{MOCK_LB_ADDRESS}/{MODEL_NAME}-{UNIT_NAME}"})},
),
# Invalid configuration, resulting in empty local_app_data
("", "subdomain", {}),
],
)
def test_ingress_with_hostname_and_routing_mode(
external_hostname,
routing_mode,
expected_local_app_data,
traefik_ctx,
traefik_container,
tmp_path,
):
"""Tests that the ingress relation provides a URL for valid external hostname and routing mode combinations."""
ipa = create_ingress_relation(strip_prefix=True, unit_name=UNIT_NAME)
state = State(
model=Model(name=MODEL_NAME),
config={"routing_mode": routing_mode, "external_hostname": external_hostname},
containers=[traefik_container],
relations=[ipa],
leader=True,
)

# event = getattr(ipa, f"changed_event")
state_out = traefik_ctx.run("config-changed", state)
assert state_out.relations[0].local_app_data == expected_local_app_data
8 changes: 4 additions & 4 deletions tests/scenario/test_ingress_per_unit.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
from scenario import Relation, State

from tests.scenario.conftest import MOCK_EXTERNAL_HOSTNAME
from tests.scenario.conftest import MOCK_LB_ADDRESS


@pytest.mark.parametrize("leader", (True, False))
Expand Down Expand Up @@ -48,12 +48,12 @@ def test_ingress_unit_provider_request_response(
assert not local_app_data
else:
if mode == "tcp":
expected_url = f"{MOCK_EXTERNAL_HOSTNAME}:{port}"
expected_url = f"{MOCK_LB_ADDRESS}:{port}"
else:
prefix = f"{model}-{remote_unit_name.replace('/', '-')}"
if routing_mode == "path":
expected_url = f"http://{MOCK_EXTERNAL_HOSTNAME}/{prefix}"
expected_url = f"http://{MOCK_LB_ADDRESS}/{prefix}"
else:
expected_url = f"http://{prefix}.{MOCK_EXTERNAL_HOSTNAME}/"
expected_url = f"http://{prefix}.{MOCK_LB_ADDRESS}/"

assert local_app_data == {"ingress": f"{remote_unit_name}:\n url: {expected_url}\n"}

0 comments on commit f824025

Please sign in to comment.