Skip to content

Commit

Permalink
Use hostname instead of IP address (#249)
Browse files Browse the repository at this point in the history
When a pod churns, if we are using IP addresses in relation data, we need to update all the places where the address is present. If we use the fqdn, we will not need to worry about keeping relation data up to date as it will not change.


* Use hostname instead of IP address to avoid issues with pod churn. Also, make sure we update remote write to use this on charm upgrade.

* fix unit tests to work with fqdn instead of ip

* There are a number of cases where we need to update the remote write endpoint so run it in _configure
  • Loading branch information
dstathis authored Mar 22, 2022
1 parent 316f9c3 commit 522db28
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 27 deletions.
30 changes: 6 additions & 24 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
import os
import re
import socket
from typing import Dict
from urllib.parse import urlparse

Expand Down Expand Up @@ -81,27 +82,13 @@ def __init__(self, *args):
self.framework.observe(self.on.prometheus_pebble_ready, self._configure)
self.framework.observe(self.on.config_changed, self._configure)
self.framework.observe(self.on.upgrade_charm, self._configure)
self.framework.observe(self.ingress.on.ingress_changed, self._on_ingress_changed)
self.framework.observe(self.ingress.on.ingress_changed, self._configure)
self.framework.observe(self.on.receive_remote_write_relation_created, self._configure)
self.framework.observe(self.on.receive_remote_write_relation_changed, self._configure)
self.framework.observe(self.on.receive_remote_write_relation_broken, self._configure)
self.framework.observe(self.metrics_consumer.on.targets_changed, self._configure)
self.framework.observe(self.alertmanager_consumer.on.cluster_changed, self._configure)

def _on_upgrade_charm(self, event):
"""Handler for the upgrade_charm event during which will update the K8s service."""
self._configure(event)

def _on_ingress_changed(self, event):
self._configure(event)
# The remote_write_provider object has the update external URL,
# as the data from the ingress relation is looked up in the Charm
# constructor, and it is already updated. However, we do need to
# trigger the update of the endpoint to the remote_write clients,
# as remote_write_provider does not observe any ingress-specific
# events.
self.remote_write_provider.update_endpoint()

def _configure(self, _):
"""Reconfigure and either reload or restart Prometheus.
Expand Down Expand Up @@ -158,6 +145,9 @@ def _configure(self, _):
):
return

# Make sure that if the remote_write endpoint changes, it is reflected in relation data.
self.remote_write_provider.update_endpoint()

self.unit.status = ActiveStatus()

def _set_alerts(self, container):
Expand Down Expand Up @@ -351,14 +341,6 @@ def _prometheus_layer(self) -> Layer:

return Layer(layer_config)

@property
def _pod_ip(self) -> str:
"""Returns the pod ip of this unit."""
if bind_address := self.model.get_binding("prometheus-peers").network.bind_address:
bind_address = str(bind_address)

return bind_address

@property
def _external_url(self) -> str:
"""Return the external hostname to be passed to ingress via the relation."""
Expand All @@ -375,7 +357,7 @@ def _external_url(self) -> str:
# are routable virtually exclusively inside the cluster (as they rely)
# on the cluster's DNS service, while the ip address is _sometimes_
# routable from the outside, e.g., when deploying on MicroK8s on Linux.
return f"http://{self._pod_ip}:{self._port}"
return f"http://{socket.getfqdn()}:{self._port}"


if __name__ == "__main__":
Expand Down
7 changes: 5 additions & 2 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# See LICENSE file for licensing details.

import json
import socket
import unittest
from unittest.mock import patch

Expand Down Expand Up @@ -68,7 +69,8 @@ def test_ingress_relation_not_set(self):
self.harness.set_leader(True)

plan = self.harness.get_container_pebble_plan("prometheus")
self.assertEqual(cli_arg(plan, "--web.external-url"), "http://1.1.1.1:9090")
fqdn = socket.getfqdn()
self.assertEqual(cli_arg(plan, "--web.external-url"), f"http://{fqdn}:9090")

@patch_network_get(private_address="1.1.1.1")
def test_ingress_relation_set(self):
Expand All @@ -78,7 +80,8 @@ def test_ingress_relation_set(self):
self.harness.add_relation_unit(rel_id, "traefik-ingress/0")

plan = self.harness.get_container_pebble_plan("prometheus")
self.assertEqual(cli_arg(plan, "--web.external-url"), "http://1.1.1.1:9090")
fqdn = socket.getfqdn()
self.assertEqual(cli_arg(plan, "--web.external-url"), f"http://{fqdn}:9090")

@patch_network_get(private_address="1.1.1.1")
def test_web_external_url_has_precedence_over_ingress_relation(self):
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/test_remote_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def setUp(self, *unused):

@patch.object(KubernetesServicePatch, "_service_object", new=lambda *args: None)
@patch.object(Prometheus, "reload_configuration", new=lambda _: True)
@patch("socket.getfqdn", new=lambda *args: "fqdn")
@patch_network_get(private_address="1.1.1.1")
def test_port_is_set(self, *unused):
self.harness.begin_with_initial_hooks()
Expand All @@ -171,7 +172,7 @@ def test_port_is_set(self, *unused):
self.harness.add_relation_unit(rel_id, "consumer/0")
self.assertEqual(
self.harness.get_relation_data(rel_id, self.harness.charm.unit.name),
{"remote_write": json.dumps({"url": "http://1.1.1.1:9090/api/v1/write"})},
{"remote_write": json.dumps({"url": "http://fqdn:9090/api/v1/write"})},
)
self.assertIsInstance(self.harness.charm.unit.status, ActiveStatus)

Expand Down

0 comments on commit 522db28

Please sign in to comment.