Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a way to configure extra receivers for uncharmed applications #7

Merged
merged 4 commits into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,17 @@ parts:
- "jsonschema"
- "opentelemetry-exporter-otlp-proto-http==1.21.0"

config:
options:
always_enable_zipkin:
PietroPasotti marked this conversation as resolved.
Show resolved Hide resolved
description: force enable a receiver for Tempo's 'zipkin' protocol.
michaeldmitry marked this conversation as resolved.
Show resolved Hide resolved
type: boolean
default: false
always_enable_otlp_grpc:
description: force enable a receiver for Tempo's 'otlp_grpc' protocol.
type: boolean
default: false
always_enable_otlp_http:
description: force enable a receiver for Tempo's 'otlp_http' protocol.
type: boolean
default: false
28 changes: 22 additions & 6 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import logging
import socket
from pathlib import Path
from typing import Dict, List, Optional, Set, Tuple
from typing import Dict, List, Optional, Set, Tuple, get_args

import ops
from charms.data_platform_libs.v0.s3 import S3Requirer
Expand Down Expand Up @@ -61,9 +61,6 @@ def __init__(self, *args):

self.tempo = tempo = Tempo(
external_host=self.hostname,
# we need otlp_http receiver for charm_tracing
# TODO add any extra receivers enabled manually via config
enable_receivers=["otlp_http"],
use_tls=self.tls_available,
)

Expand Down Expand Up @@ -250,6 +247,21 @@ def _local_ip(self) -> Optional[str]:
)
return None

@property
def enabled_receivers(self) -> Set[str]:
"""Extra receivers enabled through config"""
enabled_receivers = set()
# otlp_http is needed by charm_tracing
enabled_receivers.add("otlp_http")
enabled_receivers.update(
[
receiver
for receiver in get_args(ReceiverProtocol)
michaeldmitry marked this conversation as resolved.
Show resolved Hide resolved
if self.config.get(f"always_enable_{receiver}") is True
]
)
return enabled_receivers

##################
# EVENT HANDLERS #
##################
Expand Down Expand Up @@ -392,13 +404,14 @@ def _update_tracing_relations(self):
self._update_tempo_cluster()

def _requested_receivers(self) -> Tuple[ReceiverProtocol, ...]:
"""List what receivers we should activate, based on the active tracing relations."""
"""List what receivers we should activate, based on the active tracing relations and config-enabled extra receivers."""
# we start with the sum of the requested endpoints from the requirers
requested_protocols = set(self.tracing.requested_protocols())

# update with enabled extra receivers
requested_protocols.update(self.enabled_receivers)
# and publish only those we support
requested_receivers = requested_protocols.intersection(set(self.tempo.receiver_ports))
requested_receivers.update(self.tempo.enabled_receivers)
return tuple(requested_receivers)

def server_cert(self):
Expand Down Expand Up @@ -466,6 +479,9 @@ def _update_tempo_cluster(self):
logger.error("skipped tempo cluster update: inconsistent state")
return

if not self.unit.is_leader():
return

kwargs = {}

if self.tls_available:
Expand Down
4 changes: 2 additions & 2 deletions src/prometheus_alert_rules/tempo_workers/alerts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ groups:
for: 15m
labels:
severity: critical
- alert: TempoRequestErrors
- alert: TempoWorkerRequestErrors
annotations:
message: |
The route {{ $labels.route }} in {{ $labels.cluster }}/{{ $labels.namespace }} is experiencing {{ printf "%.2f" $value }}% errors.
Expand All @@ -27,7 +27,7 @@ groups:
for: 15m
labels:
severity: critical
- alert: TempoRequestLatency
- alert: TempoWorkerRequestLatency
annotations:
message: |
{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency.
Expand Down
2 changes: 0 additions & 2 deletions src/tempo.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,12 @@ class Tempo:
def __init__(
self,
external_host: Optional[str] = None,
enable_receivers: Optional[Sequence[ReceiverProtocol]] = None,
use_tls: bool = False,
):
# ports source: https://github.com/grafana/tempo/blob/main/example/docker-compose/local/docker-compose.yaml

# fqdn, if an ingress is not available, else the ingress address.
self._external_hostname = external_host or socket.getfqdn()
self.enabled_receivers = enable_receivers or []
michaeldmitry marked this conversation as resolved.
Show resolved Hide resolved
self.use_tls = use_tls

@property
Expand Down
30 changes: 30 additions & 0 deletions tests/integration/helpers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import logging
import subprocess
from dataclasses import dataclass
from typing import Dict

import yaml
from pytest_operator.plugin import OpsTest

_JUJU_DATA_CACHE = {}
_JUJU_KEYS = ("egress-subnets", "ingress-address", "private-address")

logger = logging.getLogger(__name__)


def purge(data: dict):
for key in _JUJU_KEYS:
Expand Down Expand Up @@ -157,3 +161,29 @@ def get_relation_data(
requirer_endpoint, provider_endpoint, include_default_juju_keys, model
)
return RelationData(provider=provider_data, requirer=requirer_data)


async def deploy_literal_bundle(ops_test: OpsTest, bundle: str):
run_args = [
"juju",
"deploy",
"--trust",
"-m",
ops_test.model_name,
str(ops_test.render_bundle(bundle)),
]

retcode, stdout, stderr = await ops_test.run(*run_args)
assert retcode == 0, f"Deploy failed: {(stderr or stdout).strip()}"
logger.info(stdout)


async def run_command(model_name: str, app_name: str, unit_num: int, command: list) -> bytes:
cmd = ["juju", "ssh", "--model", model_name, f"{app_name}/{unit_num}", *command]
try:
res = subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
logger.info(res)
except subprocess.CalledProcessError as e:
logger.error(e.stdout.decode())
raise e
return res.stdout
11 changes: 4 additions & 7 deletions tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

logger = logging.getLogger(__name__)

METADATA = yaml.safe_load(Path("./metadata.yaml").read_text())
METADATA = yaml.safe_load(Path("./charmcraft.yaml").read_text())
mc = SimpleNamespace(name="mc")


Expand All @@ -33,9 +33,6 @@ async def test_build_and_deploy(ops_test: OpsTest):
{mc.name}:
charm: {charm}
trust: true
resources:
nginx-image: {METADATA["resources"]["nginx-image"]["upstream-source"]}
nginx-prometheus-exporter-image: {METADATA["resources"]["nginx-prometheus-exporter-image"]["upstream-source"]}
scale: 1
loki:
charm: loki-k8s
Expand All @@ -54,9 +51,9 @@ async def test_build_and_deploy(ops_test: OpsTest):
scale: 1

relations:
- [mc:logging-consumer, loki:logging]
- [mc:self-metrics-endpoint, prometheus:metrics-endpoint]
- [mc:grafana-dashboards-provider, grafana:grafana-dashboard]
- [mc:logging, loki:logging]
- [mc:metrics-endpoint, prometheus:metrics-endpoint]
- [mc:grafana-dashboard, grafana:grafana-dashboard]
"""
)

Expand Down
Loading
Loading