Skip to content

Commit

Permalink
Improve status message when pebble is not ready (#325)
Browse files Browse the repository at this point in the history
* Improve status message when pebble is not ready

This commit changes the status message when pebble is not
read to "Configuring Prometheus" instead of "Waiting for pebble
ready" because the latter is not meaningful to the system
adminstrator. Also the status has been changed from WaitingStatus to
MaintenanceStatus.

Closes #267 

* Integration tests are now specific when adding relations

Recent commits added self monitoring relations to charms in the
observability charms themselves. As a result of this observability
charms that can now have more than one relation type with the Prometheus
charm. Hence adding or removing relations in integration tests needs to
be very specific about which relation type is being added or removed.
Without this the integration tests fail due to ambiguous relation.
This commit fixes these issues.

Closes #326

* Fix scaling parameter argument

This commit fixes a minor bug. The Python libjuju Scaling API used
set scale change to zero rather than scale to zero.
  • Loading branch information
Balbir Thomas authored Jul 8, 2022
1 parent c0f8d61 commit a66e301
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 21 deletions.
4 changes: 2 additions & 2 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from lightkube.resources.core_v1 import PersistentVolumeClaim, Pod
from ops.charm import ActionEvent, CharmBase
from ops.main import main
from ops.model import ActiveStatus, BlockedStatus, WaitingStatus
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus
from ops.pebble import ChangeError, ExecError, Layer

from prometheus_server import Prometheus
Expand Down Expand Up @@ -125,7 +125,7 @@ def _configure(self, _):
container = self.unit.get_container(self._name)

if not container.can_connect():
self.unit.status = WaitingStatus("Waiting for Pebble ready")
self.unit.status = MaintenanceStatus("Configuring Prometheus")
return

# push Prometheus config file to workload
Expand Down
4 changes: 3 additions & 1 deletion tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ async def test_prometheus_scrape_relation_with_prometheus_tester(
get_prometheus_rules(ops_test, prometheus_app_name, 0),
)

await ops_test.model.add_relation(prometheus_app_name, tester_app_name)
await ops_test.model.add_relation(
f"{prometheus_app_name}:metrics-endpoint", f"{tester_app_name}:metrics-endpoint"
)
await ops_test.model.wait_for_idle(apps=app_names, status="active")

config_with_relation = await get_prometheus_config(ops_test, prometheus_app_name, 0)
Expand Down
8 changes: 6 additions & 2 deletions tests/integration/test_check_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ async def test_good_config_validates_successfully(
await ops_test.model.wait_for_idle(apps=[prometheus_app_name, scrape_tester], status="active")
await check_prometheus_is_ready(ops_test, prometheus_app_name, 0)

await ops_test.model.add_relation(prometheus_app_name, scrape_tester)
await ops_test.model.add_relation(
f"{prometheus_app_name}:metrics-endpoint", f"{scrape_tester}:metrics-endpoint"
)

# set some custom configs to later check they persisted across the test
action = (
Expand Down Expand Up @@ -86,7 +88,9 @@ async def test_bad_config_sets_action_results(ops_test, prometheus_charm, promet
await ops_test.model.wait_for_idle(apps=[scrape_shim, bad_scrape_tester])

await asyncio.gather(
ops_test.model.add_relation(bad_scrape_tester, scrape_shim),
ops_test.model.add_relation(
f"{bad_scrape_tester}:metrics-endpoint", f"{scrape_shim}:configurable-scrape-jobs"
),
ops_test.model.add_relation(
f"{prometheus_app_name}:metrics-endpoint", f"{scrape_shim}:metrics-endpoint"
),
Expand Down
12 changes: 9 additions & 3 deletions tests/integration/test_deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ async def test_multiple_scrape_jobs_in_constructor(
config={"scrape_jobs": json.dumps(jobs)},
),
)
await ops_test.model.add_relation(prometheus_app_name, tester_app_name)
await ops_test.model.add_relation(
f"{prometheus_app_name}:metrics-endpoint", f"{tester_app_name}:metrics-endpoint"
)
await ops_test.model.wait_for_idle(status="active")

targets = await get_prometheus_active_targets(ops_test, prometheus_app_name)
Expand All @@ -70,7 +72,11 @@ async def test_same_app_related_two_ways(
),
)
await asyncio.gather(
ops_test.model.add_relation(prometheus_app_name, "scrape-config:metrics-endpoint"),
ops_test.model.add_relation("scrape-config", tester_app_name),
ops_test.model.add_relation(
f"{prometheus_app_name}:metrics-endpoint", "scrape-config:metrics-endpoint"
),
ops_test.model.add_relation(
"scrape-config:configurable-scrape-jobs", f"{tester_app_name}:metrics-endpoint"
),
)
await ops_test.model.wait_for_idle(status="active")
4 changes: 3 additions & 1 deletion tests/integration/test_ingress_with_traefik_k8s.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ async def test_ingress_traefik_k8s(ops_test, prometheus_charm):
assert initial_workload_is_ready(ops_test, apps)
assert await check_prometheus_is_ready(ops_test, prometheus_name, 0)

await ops_test.model.add_relation(traefik_name, f"{prometheus_name}:ingress")
await ops_test.model.add_relation(
f"{traefik_name}:ingress-per-unit", f"{prometheus_name}:ingress"
)

# Wait a little more than usual, there are various rounds of relation_changed
# to be processed.
Expand Down
12 changes: 9 additions & 3 deletions tests/integration/test_remote_write_grafana_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ async def test_remote_write_with_grafana_agent(
ops_test.model.add_relation(
f"{prometheus_name}:receive-remote-write", f"{agent_name}:send-remote-write"
),
ops_test.model.add_relation(tester_name, agent_name),
ops_test.model.add_relation(
f"{tester_name}:metrics-endpoint", f"{agent_name}:metrics-endpoint"
),
)

# A considerable idle_period is needed to guarantee metrics show up in prometheus
Expand Down Expand Up @@ -89,7 +91,9 @@ async def test_remote_write_alerts_deduplicate(ops_test):
tester_name = "prometheus-tester"
apps = [prometheus_name, tester_name]

await ops_test.model.add_relation(tester_name, prometheus_name)
await ops_test.model.add_relation(
f"{tester_name}:metrics-endpoint", f"{prometheus_name}:metrics-endpoint"
)
await ops_test.model.wait_for_idle(apps=apps, status="active", idle_period=90)

# Make sure only one copy of the alerts is present
Expand Down Expand Up @@ -147,10 +151,12 @@ async def test_check_data_not_persist_on_scale_0(ops_test, prometheus_charm):
# the timestamp and the value itself.
num_head_chunks_before = int(total0[0]["value"][1])

await ops_test.model.applications[prometheus_app_name].scale(scale_change=0)
logger.info("Scaling down %s to zero units", prometheus_app_name)
await ops_test.model.applications[prometheus_app_name].scale(scale=0)
await ops_test.model.block_until(
lambda: len(ops_test.model.applications[prometheus_app_name].units) == 0
)
logger.info("Scaling up %s units to 1", prometheus_app_name)
await ops_test.model.applications[prometheus_app_name].scale(scale_change=1)
await ops_test.model.wait_for_idle(apps=[prometheus_app_name], status="active", timeout=120)
assert await check_prometheus_is_ready(ops_test, prometheus_app_name, 0)
Expand Down
24 changes: 16 additions & 8 deletions tests/integration/test_rerelate.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,14 @@ async def test_build_and_deploy(ops_test: OpsTest, prometheus_charm, prometheus_
)

await asyncio.gather(
ops_test.model.add_relation(app_name, tester_app_name),
ops_test.model.add_relation(app_name, "alertmanager"),
ops_test.model.add_relation(app_name, "grafana:grafana-source"),
ops_test.model.add_relation(app_name, "grafana-agent:send-remote-write"),
ops_test.model.add_relation(
f"{app_name}:metrics-endpoint", f"{tester_app_name}:metrics-endpoint"
),
ops_test.model.add_relation(f"{app_name}:alertmanager", "alertmanager:alerting"),
ops_test.model.add_relation(f"{app_name}:grafana-source", "grafana:grafana-source"),
ops_test.model.add_relation(
f"{app_name}:receive-remote-write", "grafana-agent:send-remote-write"
),
)
await ops_test.model.wait_for_idle(status="active", timeout=600)

Expand All @@ -97,9 +101,13 @@ async def test_remove_relation(ops_test: OpsTest):
@pytest.mark.abort_on_fail
async def test_rerelate(ops_test: OpsTest):
await asyncio.gather(
ops_test.model.add_relation(app_name, tester_app_name),
ops_test.model.add_relation(app_name, "alertmanager"),
ops_test.model.add_relation(app_name, "grafana:grafana-source"),
ops_test.model.add_relation(app_name, "grafana-agent:send-remote-write"),
ops_test.model.add_relation(
f"{app_name}:metrics-endpoint", f"{tester_app_name}:metrics-endpoint"
),
ops_test.model.add_relation(f"{app_name}:alertmanager", "alertmanager:alerting"),
ops_test.model.add_relation(f"{app_name}:grafana-source", "grafana:grafana-source"),
ops_test.model.add_relation(
f"{app_name}:receive-remote-write", "grafana-agent:send-remote-write"
),
)
await ops_test.model.wait_for_idle(status="active", timeout=600)
4 changes: 3 additions & 1 deletion tests/integration/test_upgrade_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ async def test_deploy_from_edge_and_upgrade_from_local_path(ops_test, prometheus
)
await ops_test.model.wait_for_idle(apps=app_names, status="active", timeout=300)

await ops_test.model.add_relation(prometheus_app_name, tester_app_name)
await ops_test.model.add_relation(
f"{prometheus_app_name}:metrics-endpoint", f"{tester_app_name}:metrics-endpoint"
)
await ops_test.model.wait_for_idle(apps=app_names, status="active")
# Check only one alert rule exists
rules_with_relation = await get_prometheus_rules(ops_test, prometheus_app_name, 0)
Expand Down

0 comments on commit a66e301

Please sign in to comment.