From bf15cd4c7333b99e3e4d92da6377c257495c15d8 Mon Sep 17 00:00:00 2001 From: Balbir Thomas Date: Fri, 22 Jul 2022 14:24:57 +0100 Subject: [PATCH] Metrics Provider uses bind address by default (#331) * Metrics Provider uses bind address by default This commit ensures MetricsEndpointProvider uses bind_address rather than socket.fqdn() by default and only falls back to fqdn() if bind address is not valid. This change was made because it was found that in some cases fqdn() did not return resolvable addresses for example if MetricsEndpointProvider charm was in a LXD could on localhost and MetricsEndpointProvider was on a Microk8s cloud on the same host. * Replace Multiline conditional with single line Co-authored-by: Leon <82407168+sed-i@users.noreply.github.com> Co-authored-by: Leon <82407168+sed-i@users.noreply.github.com> --- .../prometheus_k8s/v0/prometheus_scrape.py | 6 +++- tests/unit/test_endpoint_provider.py | 29 +++++++++++++++---- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/lib/charms/prometheus_k8s/v0/prometheus_scrape.py b/lib/charms/prometheus_k8s/v0/prometheus_scrape.py index ae172f9f..f73d2edc 100644 --- a/lib/charms/prometheus_k8s/v0/prometheus_scrape.py +++ b/lib/charms/prometheus_k8s/v0/prometheus_scrape.py @@ -1553,7 +1553,11 @@ def _set_unit_ip(self, _): event is actually needed. """ for relation in self._charm.model.relations[self._relation_name]: - relation.data[self._charm.unit]["prometheus_scrape_unit_address"] = socket.getfqdn() + unit_ip = str(self._charm.model.get_binding(relation).network.bind_address) + relation.data[self._charm.unit]["prometheus_scrape_unit_address"] = ( + unit_ip if self._is_valid_unit_address(unit_ip) else socket.getfqdn() + ) + relation.data[self._charm.unit]["prometheus_scrape_unit_name"] = str( self._charm.model.unit.name ) diff --git a/tests/unit/test_endpoint_provider.py b/tests/unit/test_endpoint_provider.py index b67d95db..fd3ff878 100644 --- a/tests/unit/test_endpoint_provider.py +++ b/tests/unit/test_endpoint_provider.py @@ -20,7 +20,7 @@ RelationRoleMismatchError, ) from deepdiff import DeepDiff -from helpers import TempFolderSandbox +from helpers import TempFolderSandbox, patch_network_get from ops.charm import CharmBase from ops.framework import StoredState from ops.testing import Harness @@ -162,6 +162,7 @@ def test_provider_default_scrape_relation_wrong_role(self): ) self.assertRaises(RelationRoleMismatchError, harness.begin) + @patch_network_get() def test_provider_sets_scrape_metadata(self): rel_id = self.harness.add_relation(RELATION_NAME, "provider") self.harness.add_relation_unit(rel_id, "provider/0") @@ -241,23 +242,34 @@ def test_provider_can_refresh_on_multiple_events(self, mock_set_unit_ip): harness.container_pebble_ready("prometheus-tester") self.assertEqual(mock_set_unit_ip.call_count, 2) - @patch("socket.getfqdn", new=lambda *args: "fqdn1") + @patch_network_get() def test_provider_unit_sets_address_on_pebble_ready(self): rel_id = self.harness.add_relation(RELATION_NAME, "provider") self.harness.container_pebble_ready("prometheus-tester") data = self.harness.get_relation_data(rel_id, self.harness.charm.unit.name) self.assertIn("prometheus_scrape_unit_address", data) - self.assertEqual(data["prometheus_scrape_unit_address"], "fqdn1") + self.assertEqual(data["prometheus_scrape_unit_address"], "10.1.157.116") - @patch("socket.getfqdn", new=lambda *args: "fqdn2") + @patch_network_get() def test_provider_unit_sets_address_on_relation_joined(self): rel_id = self.harness.add_relation(RELATION_NAME, "provider") self.harness.add_relation_unit(rel_id, "provider/0") data = self.harness.get_relation_data(rel_id, self.harness.charm.unit.name) self.assertIn("prometheus_scrape_unit_address", data) - self.assertEqual(data["prometheus_scrape_unit_address"], "fqdn2") + self.assertEqual(data["prometheus_scrape_unit_address"], "10.1.157.116") self.assertIn("prometheus_scrape_unit_name", data) + @patch("socket.getfqdn", new=lambda *args: "some.host") + @patch_network_get(private_address=None) + def test_provider_unit_sets_fqdn_if_not_address_on_relation_joined(self): + rel_id = self.harness.add_relation(RELATION_NAME, "provider") + self.harness.add_relation_unit(rel_id, "provider/0") + data = self.harness.get_relation_data(rel_id, self.harness.charm.unit.name) + self.assertIn("prometheus_scrape_unit_address", data) + self.assertEqual(data["prometheus_scrape_unit_address"], "some.host") + self.assertIn("prometheus_scrape_unit_name", data) + + @patch_network_get() def test_provider_supports_multiple_jobs(self): rel_id = self.harness.add_relation(RELATION_NAME, "provider") self.harness.add_relation_unit(rel_id, "provider/0") @@ -269,6 +281,7 @@ def test_provider_supports_multiple_jobs(self): job_names = [job["job_name"] for job in JOBS] self.assertListEqual(names, job_names) + @patch_network_get() def test_provider_sanitizes_jobs(self): rel_id = self.harness.add_relation(RELATION_NAME, "provider") self.harness.add_relation_unit(rel_id, "provider/0") @@ -279,6 +292,7 @@ def test_provider_sanitizes_jobs(self): keys = set(job.keys()) self.assertTrue(keys.issubset(ALLOWED_KEYS)) + @patch_network_get() def test_each_alert_rule_is_topology_labeled(self): rel_id = self.harness.add_relation(RELATION_NAME, "provider") self.harness.add_relation_unit(rel_id, "provider/0") @@ -310,6 +324,7 @@ def test_each_alert_rule_is_topology_labeled(self): self.assertIn("juju_unit", rule["labels"]) self.assertIn("juju_unit=", rule["expr"]) + @patch_network_get() def test_each_alert_expression_is_topology_labeled(self): rel_id = self.harness.add_relation(RELATION_NAME, "provider") self.harness.add_relation_unit(rel_id, "provider/0") @@ -360,6 +375,7 @@ def setup(self, **kwargs): self.harness.set_leader(True) self.harness.begin() + @patch_network_get() def test_a_bad_alert_expression_logs_an_error(self): self.setup(alert_rules_path="./tests/unit/bad_alert_expressions") @@ -370,6 +386,7 @@ def test_a_bad_alert_expression_logs_an_error(self): self.assertEqual(len(messages), 1) self.assertIn("Invalid rules file: missing_expr.rule", messages[0]) + @patch_network_get() def test_a_bad_alert_rules_logs_an_error(self): self.setup(alert_rules_path="./tests/unit/bad_alert_rules") @@ -693,6 +710,7 @@ def setup(self, **kwargs): self.harness.set_leader(True) self.harness.begin() + @patch_network_get() def test_unit_label_is_retained_if_hard_coded(self): self.setup(alert_rules_path="./tests/unit/alert_rules_with_unit_topology") rel_id = self.harness.add_relation("metrics-endpoint", "provider") @@ -721,6 +739,7 @@ def setUp(self): self.harness.set_leader(False) self.harness.begin_with_initial_hooks() + @patch_network_get() def test_alert_rules(self): """Verify alert rules are added when leader is elected after the relation is created.""" rel_id = self.harness.add_relation(RELATION_NAME, "provider")