diff --git a/docs/json_schemas/tempo_cluster/v1/provider.json b/docs/json_schemas/tempo_cluster/v1/provider.json new file mode 100644 index 00000000..212ae4d7 --- /dev/null +++ b/docs/json_schemas/tempo_cluster/v1/provider.json @@ -0,0 +1,184 @@ +{ + "$defs": { + "BaseModel": { + "properties": {}, + "title": "BaseModel", + "type": "object" + }, + "TempoClusterProviderAppData": { + "description": "TempoClusterProviderAppData.", + "properties": { + "worker_config": { + "contentMediaType": "application/json", + "contentSchema": { + "type": "string" + }, + "description": "The tempo configuration that the requirer should run with.Yaml-encoded. Must conform to the schema that the presently deployed workload version supports; for example see: https://grafana.com/docs/tempo/latest/configuration/#configure-tempo.", + "title": "Worker Config", + "type": "string" + }, + "loki_endpoints": { + "anyOf": [ + { + "contentMediaType": "application/json", + "contentSchema": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "List of loki-push-api endpoints to which the worker node can push any logs it generates.", + "title": "Loki Endpoints" + }, + "ca_cert": { + "anyOf": [ + { + "contentMediaType": "application/json", + "contentSchema": { + "type": "string" + }, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "CA certificate for tls encryption.", + "title": "Ca Cert" + }, + "server_cert": { + "anyOf": [ + { + "contentMediaType": "application/json", + "contentSchema": { + "type": "string" + }, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Server certificate for tls encryption.", + "title": "Server Cert" + }, + "privkey_secret_id": { + "anyOf": [ + { + "contentMediaType": "application/json", + "contentSchema": { + "type": "string" + }, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "ID of a Juju secret that holds the private key used by the coordinator for TLS encryption.", + "title": "Privkey Secret Id" + }, + "remote_write_endpoints": { + "anyOf": [ + { + "contentMediaType": "application/json", + "contentSchema": { + "items": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "type": "array" + }, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Endpoints to which the workload (and the worker charm) can push metrics to.", + "title": "Remote Write Endpoints" + }, + "charm_tracing_receivers": { + "anyOf": [ + { + "contentMediaType": "application/json", + "contentSchema": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Endpoints to which the worker node can push its charm traces to.It is a mapping from protocol names such as `zipkin`, `otlp_grpc`, `otlp_http`.", + "title": "Charm Tracing Receivers" + }, + "workload_tracing_receivers": { + "anyOf": [ + { + "contentMediaType": "application/json", + "contentSchema": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Endpoints to which the worker node can push its workload traces to.It is a mapping from protocol names such as `zipkin`, `otlp_grpc`, `otlp_http`.", + "title": "Workload Tracing Receivers" + } + }, + "required": [ + "worker_config" + ], + "title": "TempoClusterProviderAppData", + "type": "object" + } + }, + "description": "The schema for the provider side of this interface.", + "properties": { + "unit": { + "anyOf": [ + { + "$ref": "#/$defs/BaseModel" + }, + { + "type": "null" + } + ], + "default": null + }, + "app": { + "$ref": "#/$defs/TempoClusterProviderAppData" + } + }, + "required": [ + "app" + ], + "title": "ProviderSchema", + "type": "object" +} \ No newline at end of file diff --git a/docs/json_schemas/tempo_cluster/v1/requirer.json b/docs/json_schemas/tempo_cluster/v1/requirer.json new file mode 100644 index 00000000..857fed5d --- /dev/null +++ b/docs/json_schemas/tempo_cluster/v1/requirer.json @@ -0,0 +1,116 @@ +{ + "$defs": { + "TempoClusterRequirerAppData": { + "description": "TempoClusterRequirerAppData.", + "properties": { + "role": { + "contentMediaType": "application/json", + "contentSchema": { + "$ref": "#/$defs/TempoRole" + }, + "title": "Role", + "type": "string" + } + }, + "required": [ + "role" + ], + "title": "TempoClusterRequirerAppData", + "type": "object" + }, + "TempoClusterRequirerUnitData": { + "description": "TempoClusterRequirerUnitData.", + "properties": { + "juju_topology": { + "contentMediaType": "application/json", + "contentSchema": { + "$ref": "#/$defs/_Topology" + }, + "title": "Juju Topology", + "type": "string" + }, + "address": { + "contentMediaType": "application/json", + "contentSchema": { + "type": "string" + }, + "title": "Address", + "type": "string" + } + }, + "required": [ + "juju_topology", + "address" + ], + "title": "TempoClusterRequirerUnitData", + "type": "object" + }, + "TempoRole": { + "description": "Tempo component role names.\n\nReferences:\n arch:\n -> https://grafana.com/docs/tempo/latest/operations/architecture/\n config:\n -> https://grafana.com/docs/tempo/latest/configuration/#server", + "enum": [ + "all", + "querier", + "query-frontend", + "ingester", + "distributor", + "compactor", + "metrics-generator" + ], + "title": "TempoRole", + "type": "string" + }, + "_Topology": { + "description": "JujuTopology as defined by cos-lib.", + "properties": { + "application": { + "title": "Application", + "type": "string" + }, + "charm_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Charm Name" + }, + "unit": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Unit" + } + }, + "required": [ + "application", + "charm_name", + "unit" + ], + "title": "_Topology", + "type": "object" + } + }, + "description": "The schema for the requirer side of this interface.", + "properties": { + "unit": { + "$ref": "#/$defs/TempoClusterRequirerUnitData" + }, + "app": { + "$ref": "#/$defs/TempoClusterRequirerAppData" + } + }, + "required": [ + "unit", + "app" + ], + "title": "RequirerSchema", + "type": "object" +} \ No newline at end of file diff --git a/interfaces/tempo_cluster/v1/README.md b/interfaces/tempo_cluster/v1/README.md new file mode 100644 index 00000000..a3c52953 --- /dev/null +++ b/interfaces/tempo_cluster/v1/README.md @@ -0,0 +1,54 @@ +# `tempo_cluster` + +## Usage + +`tempo_cluster` is an interface meant to exchange cluster configuration in distributed Tempo deployments. +Multiple [Tempo worker](https://github.com/canonical/tempo-worker-k8s-operator) applications can relate to a [Tempo coordinator](https://github.com/canonical/tempo-coordinator-k8s-operator) application over the `tempo_cluster` interface, and send their role and topology, in order to join the cluster. +The coordinator will use the same relation to convey to the workers back the configuration that they must run with. + +## Direction + +This interface implements a provider/requirer pattern. The coordinator charm is the provider of the relation, the worker charm is the requirer. Information flows back and forth: first the requirer shares some data necessary for the coordinator to know the role of the worker, then the provider replies back with the configuration it should run with. + +```mermaid +flowchart TD + Requirer -- Role, JujuTopology --> Provider + Provider -- Config --> Requirer +``` + +## Behavior + +### Provider +The provider is expected to... +- update the gossip rings in all configurations with the addresses of all worker units that are joining the cluster (regardless of their role). +- share the exact same configuration to all nodes, regardless of the role they declare, via application databag. + +### Requirer +The requirer application is expected to... +- publish its role as soon as possible via application databag. +Each requirer unit is expected to... +- publish its address and JujuTopology as soon as possible, via unit databag. + +## Relation Data + +[\[Pydantic Schema\]](./schema.py) + +#### Example +```yaml +provider: + app: + worker_config: + # + unit: {} + +requirer: + app: + role: receiver + unit: + juju_topology: + model: "mymodel", + model_uuid: "1231234120941234", + application: "tempo-receiver", + charm_name: "tempo-worker-k8s", + unit: "tempo-receiver/2", +``` diff --git a/interfaces/tempo_cluster/v1/interface.yaml b/interfaces/tempo_cluster/v1/interface.yaml new file mode 100644 index 00000000..71670dbd --- /dev/null +++ b/interfaces/tempo_cluster/v1/interface.yaml @@ -0,0 +1,18 @@ +name: tempo_cluster +internal: true +version: 1 +status: published + +providers: + - name: tempo-coordinator-k8s + url: https://github.com/canonical/tempo-coordinator-k8s-operator + test_setup: + location: tests/interface/conftest.py + identifier: cluster_tester + +requirers: + - name: tempo-worker-k8s + url: https://github.com/canonical/tempo-worker-k8s-operator + +maintainer: observability + diff --git a/interfaces/tempo_cluster/v1/interface_tests/test_provider.py b/interfaces/tempo_cluster/v1/interface_tests/test_provider.py new file mode 100644 index 00000000..752e68bf --- /dev/null +++ b/interfaces/tempo_cluster/v1/interface_tests/test_provider.py @@ -0,0 +1,95 @@ +# Copyright 2024 Canonical +# See LICENSE file for licensing details. +import json + +from interface_tester.interface_test import Tester +from scenario import Relation, State + + +def test_validation_fails_with_missing_role(): + tester = Tester( + state_in=State( + relations=[ + Relation( + endpoint="tempo_cluster", + interface="tempo_cluster", + remote_app_name="worker", + remote_app_data={}, + remote_units_data={ + 0: { + "juju_topology": json.dumps( + { + "application": "worker", + "unit": "worker/0", + "charm_name": "worker", + } + ), + "address": json.dumps("192.0.2.1"), + } + }, + ) + ] + ) + ) + tester.run("tempo-cluster-relation-created") + tester.assert_relation_data_empty() + + +def test_validation_succeeds_on_joining_with_role(): + tester = Tester( + state_in=State( + relations=[ + Relation( + endpoint="tempo_cluster", + interface="tempo_cluster", + remote_app_name="worker", + remote_app_data={ + "role": json.dumps("all"), + }, + remote_units_data={ + 0: { + "juju_topology": json.dumps( + { + "application": "worker", + "unit": "worker/0", + "charm_name": "worker", + } + ), + "address": json.dumps("192.0.2.1"), + } + }, + ), + ] + ) + ) + tester.run("tempo-cluster-relation-joined") + tester.assert_schema_valid() + + +def test_validation_fails_on_joining_with_invalid_role(): + tester = Tester( + state_in=State( + relations=[ + Relation( + endpoint="tempo_cluster", + interface="tempo_cluster", + remote_app_name="worker", + remote_app_data={"role": json.dumps("imposter")}, + remote_units_data={ + 0: { + "juju_topology": json.dumps( + { + "application": "worker", + "unit": "worker/0", + "charm_name": "worker", + } + ), + "address": json.dumps("192.0.2.1"), + } + }, + ) + ] + ) + ) + tester.run("tempo-cluster-relation-joined") + tester.assert_relation_data_empty() diff --git a/interfaces/tempo_cluster/v1/interface_tests/test_requirer.py b/interfaces/tempo_cluster/v1/interface_tests/test_requirer.py new file mode 100644 index 00000000..dbceb9c5 --- /dev/null +++ b/interfaces/tempo_cluster/v1/interface_tests/test_requirer.py @@ -0,0 +1,31 @@ +# Copyright 2024 Canonical +# See LICENSE file for licensing details. + +import json + +from interface_tester.interface_test import Tester +from scenario import Relation, State + + +def test_data_on_created(): + tester = Tester( + state_in=State( + relations=[ + Relation( + endpoint="tempo_cluster", + interface="tempo_cluster", + remote_app_name="coordinator", + remote_app_data={"worker_config": json.dumps("foo: bar")}, + charm_tracing_receivers={ + "otlp_http": "http://192.0.2.1:4318", + }, + workload_tracing_receivers={ + "otlp_http": "http://192.0.2.2:4318", + "otlp_grpc": "192.0.2.2:4317", + }, + ) + ] + ) + ) + tester.run("tempo-cluster-relation-created") + tester.assert_schema_valid() diff --git a/interfaces/tempo_cluster/v1/schema.py b/interfaces/tempo_cluster/v1/schema.py new file mode 100644 index 00000000..c823f4d3 --- /dev/null +++ b/interfaces/tempo_cluster/v1/schema.py @@ -0,0 +1,93 @@ +"""This file defines the schemas for the provider and requirer sides of this relation interface. + +It must expose two interfaces.schema_base.DataBagSchema subclasses called: +- ProviderSchema +- RequirerSchema +""" +from enum import Enum +from typing import Optional, Dict, List + +from interface_tester.schema_base import DataBagSchema +from pydantic import BaseModel, Field, Json + + +class TempoClusterProviderAppData(BaseModel): + """TempoClusterProviderAppData.""" + worker_config: Json[str] = Field( + description="The tempo configuration that the requirer should run with." + "Yaml-encoded. Must conform to the schema that the presently deployed " + "workload version supports; for example see: " + "https://grafana.com/docs/tempo/latest/configuration/#configure-tempo." + ) + loki_endpoints: Optional[Json[Dict[str, str]]] = Field( + default=None, + description="List of loki-push-api endpoints to which the worker node can push any logs it generates.") + ca_cert: Optional[Json[str]] = Field(default=None, description="CA certificate for tls encryption.") + server_cert: Optional[Json[str]] = Field(default=None, description="Server certificate for tls encryption.") + privkey_secret_id: Optional[Json[str]] = Field( + default=None, + description="ID of a Juju secret that holds the private key used by the coordinator for TLS encryption." + ) + remote_write_endpoints: Optional[Json[List[Dict[str, str]]]] = Field( + default=None, + description="Endpoints to which the workload (and the worker charm) can push metrics to." + ) + charm_tracing_receivers: Optional[Json[Dict[str, str]]] = Field( + default=None, + description="Endpoints to which the worker node can push its charm traces to." + "It is a mapping from protocol names such as `zipkin`, `otlp_grpc`, `otlp_http`." + ) + workload_tracing_receivers: Optional[Json[Dict[str, str]]] = Field( + default=None, + description="Endpoints to which the worker node can push its workload traces to." + "It is a mapping from protocol names such as `zipkin`, `otlp_grpc`, `otlp_http`." + ) + + +class _Topology(BaseModel): + """JujuTopology as defined by cos-lib.""" + application: str + charm_name: Optional[str] + unit: Optional[str] + + +class TempoClusterRequirerUnitData(BaseModel): + """TempoClusterRequirerUnitData.""" + + juju_topology: Json[_Topology] + address: Json[str] + + +class TempoRole(str, Enum): + """Tempo component role names. + + References: + arch: + -> https://grafana.com/docs/tempo/latest/operations/architecture/ + config: + -> https://grafana.com/docs/tempo/latest/configuration/#server + """ + ALL = "all" # default, meta-role. gets remapped to scalable-single-binary by the worker. + QUERIER = "querier" + QUERY_FRONTEND = "query-frontend" + INGESTER = "ingester" + DISTRIBUTOR = "distributor" + COMPACTOR = "compactor" + METRICS_GENERATOR = "metrics-generator" + + +class TempoClusterRequirerAppData(BaseModel): + """TempoClusterRequirerAppData.""" + + role: Json[TempoRole] + + +class ProviderSchema(DataBagSchema): + """The schema for the provider side of this interface.""" + app: TempoClusterProviderAppData + + +class RequirerSchema(DataBagSchema): + """The schema for the requirer side of this interface.""" + app: TempoClusterRequirerAppData + unit: TempoClusterRequirerUnitData diff --git a/utils/interface-validator.py b/utils/interface-validator.py index ac260bc8..b1485756 100644 --- a/utils/interface-validator.py +++ b/utils/interface-validator.py @@ -67,7 +67,7 @@ def _validate_against_path(self, file, model): if model.name != result.group(1): raise MatchError(f"name '{model.name}' does not match folder structure '{result.group(1)}'") if model.version != int(result.group(2)): - raise MatchError("version ({result.group(2)}) does not match folder structure") + raise MatchError(f"version ({result.group(2)}) does not match folder structure") """Runs the validation against all interface definitions.""" def run(self):