Skip to content

Commit

Permalink
outbound: Add a request_duration histogram for route backends
Browse files Browse the repository at this point in the history
The outbound proxy reports a counter,
outbound_http_route_backend_requests_total, that illustrates how requests are
dispatched over a logical service's backends.

This change augments these metrics with "request duration" histograms. This
terminology is consistent with that of the prometheus Go client library.

    # HELP outbound_http_route_backend_request_duration_seconds The durations between sending an HTTP request and receiving response headers.
    # TYPE outbound_http_route_backend_request_duration_seconds histogram
    # UNIT outbound_http_route_backend_request_duration_seconds seconds
    outbound_http_route_backend_request_duration_seconds_sum{status_code="200",parent_group="core",parent_kind="Service",parent_namespace="emojivoto",parent_name="emoji-svc",parent_port="8080",parent_section_name="",route_group="",route_kind="default",route_namespace="",route_name="http",backend_group="core",backend_kind="Service",backend_namespace="emojivoto",backend_name="emoji-svc",backend_port="8080",backend_section_name=""} 0.07080217000000001
    outbound_http_route_backend_request_duration_seconds_count{status_code="200",parent_group="core",parent_kind="Service",parent_namespace="emojivoto",parent_name="emoji-svc",parent_port="8080",parent_section_name="",route_group="",route_kind="default",route_namespace="",route_name="http",backend_group="core",backend_kind="Service",backend_namespace="emojivoto",backend_name="emoji-svc",backend_port="8080",backend_section_name=""} 54
    outbound_http_route_backend_request_duration_seconds_bucket{le="0.025",status_code="200",parent_group="core",parent_kind="Service",parent_namespace="emojivoto",parent_name="emoji-svc",parent_port="8080",parent_section_name="",route_group="",route_kind="default",route_namespace="",route_name="http",backend_group="core",backend_kind="Service",backend_namespace="emojivoto",backend_name="emoji-svc",backend_port="8080",backend_section_name=""} 54
    outbound_http_route_backend_request_duration_seconds_bucket{le="0.05",status_code="200",parent_group="core",parent_kind="Service",parent_namespace="emojivoto",parent_name="emoji-svc",parent_port="8080",parent_section_name="",route_group="",route_kind="default",route_namespace="",route_name="http",backend_group="core",backend_kind="Service",backend_namespace="emojivoto",backend_name="emoji-svc",backend_port="8080",backend_section_name=""} 54
    outbound_http_route_backend_request_duration_seconds_bucket{le="0.1",status_code="200",parent_group="core",parent_kind="Service",parent_namespace="emojivoto",parent_name="emoji-svc",parent_port="8080",parent_section_name="",route_group="",route_kind="default",route_namespace="",route_name="http",backend_group="core",backend_kind="Service",backend_namespace="emojivoto",backend_name="emoji-svc",backend_port="8080",backend_section_name=""} 54
    outbound_http_route_backend_request_duration_seconds_bucket{le="0.25",status_code="200",parent_group="core",parent_kind="Service",parent_namespace="emojivoto",parent_name="emoji-svc",parent_port="8080",parent_section_name="",route_group="",route_kind="default",route_namespace="",route_name="http",backend_group="core",backend_kind="Service",backend_namespace="emojivoto",backend_name="emoji-svc",backend_port="8080",backend_section_name=""} 54
    outbound_http_route_backend_request_duration_seconds_bucket{le="0.5",status_code="200",parent_group="core",parent_kind="Service",parent_namespace="emojivoto",parent_name="emoji-svc",parent_port="8080",parent_section_name="",route_group="",route_kind="default",route_namespace="",route_name="http",backend_group="core",backend_kind="Service",backend_namespace="emojivoto",backend_name="emoji-svc",backend_port="8080",backend_section_name=""} 54
    outbound_http_route_backend_request_duration_seconds_bucket{le="1.0",status_code="200",parent_group="core",parent_kind="Service",parent_namespace="emojivoto",parent_name="emoji-svc",parent_port="8080",parent_section_name="",route_group="",route_kind="default",route_namespace="",route_name="http",backend_group="core",backend_kind="Service",backend_namespace="emojivoto",backend_name="emoji-svc",backend_port="8080",backend_section_name=""} 54
    outbound_http_route_backend_request_duration_seconds_bucket{le="2.5",status_code="200",parent_group="core",parent_kind="Service",parent_namespace="emojivoto",parent_name="emoji-svc",parent_port="8080",parent_section_name="",route_group="",route_kind="default",route_namespace="",route_name="http",backend_group="core",backend_kind="Service",backend_namespace="emojivoto",backend_name="emoji-svc",backend_port="8080",backend_section_name=""} 54
    outbound_http_route_backend_request_duration_seconds_bucket{le="5.0",status_code="200",parent_group="core",parent_kind="Service",parent_namespace="emojivoto",parent_name="emoji-svc",parent_port="8080",parent_section_name="",route_group="",route_kind="default",route_namespace="",route_name="http",backend_group="core",backend_kind="Service",backend_namespace="emojivoto",backend_name="emoji-svc",backend_port="8080",backend_section_name=""} 54
    outbound_http_route_backend_request_duration_seconds_bucket{le="+Inf",status_code="200",parent_group="core",parent_kind="Service",parent_namespace="emojivoto",parent_name="emoji-svc",parent_port="8080",parent_section_name="",route_group="",route_kind="default",route_namespace="",route_name="http",backend_group="core",backend_kind="Service",backend_namespace="emojivoto",backend_name="emoji-svc",backend_port="8080",backend_section_name=""} 54

A constrained histogram is used to balance the tradeoff between accuracy and
cost.

Additionally, a basic counter is added to track errors emitted from backends.
Given the current proxy configuration, these can only indicate load shedding
errors:

    # HELP outbound_http_route_backend_request_errors The total number of errors encountered while waiting for a response.
    # TYPE outbound_http_route_backend_request_errors counter
    outbound_http_route_backend_request_errors_total{parent_group="core",parent_kind="Service",parent_namespace="emojivoto",parent_name="emoji-svc",parent_port="8080",parent_section_name="",route_group="",route_kind="default",route_namespace="",route_name="http",backend_group="core",backend_kind="Service",backend_namespace="emojivoto",backend_name="emoji-svc",backend_port="8080",backend_section_name=""} 0
  • Loading branch information
olix0r committed May 1, 2024
1 parent 39bf567 commit 641fc05
Show file tree
Hide file tree
Showing 10 changed files with 395 additions and 51 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1499,9 +1499,12 @@ dependencies = [
name = "linkerd-http-prom"
version = "0.1.0"
dependencies = [
"futures",
"http",
"linkerd-stack",
"pin-project",
"prometheus-client",
"tokio",
]

[[package]]
Expand Down
6 changes: 3 additions & 3 deletions linkerd/app/outbound/src/http/logical/policy/route.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@ impl RouteMetrics {
}

#[cfg(test)]
pub(crate) fn request_count(
pub(crate) fn backend_metrics(
&self,
p: crate::ParentRef,
r: RouteRef,
b: crate::BackendRef,
) -> backend::RequestCount {
self.backend.request_count(p, r, b)
) -> backend::BackendHttpMetrics {
self.backend.get(p, r, b)
}
}

Expand Down
6 changes: 3 additions & 3 deletions linkerd/app/outbound/src/http/logical/policy/route/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::{fmt::Debug, hash::Hash, sync::Arc};

mod metrics;

pub use self::metrics::{RequestCount, RouteBackendMetrics};
pub use self::metrics::{BackendHttpMetrics, RouteBackendMetrics};

#[derive(Debug, PartialEq, Eq, Hash)]
pub(crate) struct Backend<T, F> {
Expand Down Expand Up @@ -69,7 +69,7 @@ where
F: Clone + Send + Sync + 'static,
// Assert that filters can be applied.
Self: filters::Apply,
RouteBackendMetrics: svc::ExtractParam<RequestCount, Self>,
RouteBackendMetrics: svc::ExtractParam<BackendHttpMetrics, Self>,
{
/// Builds a stack that applies per-route-backend policy filters over an
/// inner [`Concrete`] stack.
Expand Down Expand Up @@ -101,7 +101,7 @@ where
)
.push(filters::NewApplyFilters::<Self, _, _>::layer())
.push(http::NewTimeout::layer())
.push(metrics::NewCountRequests::layer_via(metrics.clone()))
.push(metrics::NewBackendHttpMetrics::layer_via(metrics.clone()))
.push(svc::NewMapErr::layer_with(|t: &Self| {
let backend = t.params.concrete.backend_ref.clone();
move |source| {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,41 @@ use linkerd_app_core::{
metrics::prom::{self, encoding::*, EncodeLabelSetMut},
svc,
};
use linkerd_http_prom::RequestCountFamilies;
use linkerd_http_prom::HttpMetricsFamiles;

pub use linkerd_http_prom::RequestCount;
pub type BackendHttpMetrics = linkerd_http_prom::HttpMetrics<RouteBackendLabels>;

pub type NewCountRequests<N> = linkerd_http_prom::NewCountRequests<RouteBackendMetrics, N>;
pub type NewBackendHttpMetrics<N> =
linkerd_http_prom::NewHttpMetrics<RouteBackendMetrics, RouteBackendLabels, N>;

#[derive(Clone, Debug, Default)]
pub struct RouteBackendMetrics {
metrics: RequestCountFamilies<RouteBackendLabels>,
metrics: HttpMetricsFamiles<RouteBackendLabels>,
}

#[derive(Clone, Debug, Hash, PartialEq, Eq)]
struct RouteBackendLabels(ParentRef, RouteRef, BackendRef);
pub struct RouteBackendLabels(ParentRef, RouteRef, BackendRef);

// === impl RouteBackendMetrics ===

impl RouteBackendMetrics {
pub fn register(reg: &mut prom::Registry) -> Self {
Self {
metrics: RequestCountFamilies::register(reg),
metrics: HttpMetricsFamiles::register(reg),
}
}

#[cfg(test)]
pub(crate) fn request_count(&self, p: ParentRef, r: RouteRef, b: BackendRef) -> RequestCount {
pub(crate) fn get(&self, p: ParentRef, r: RouteRef, b: BackendRef) -> BackendHttpMetrics {
self.metrics.metrics(&RouteBackendLabels(p, r, b))
}
}

impl<T> svc::ExtractParam<RequestCount, T> for RouteBackendMetrics
impl<T> svc::ExtractParam<BackendHttpMetrics, T> for RouteBackendMetrics
where
T: svc::Param<ParentRef> + svc::Param<RouteRef> + svc::Param<BackendRef>,
{
fn extract_param(&self, t: &T) -> RequestCount {
fn extract_param(&self, t: &T) -> BackendHttpMetrics {
self.metrics
.metrics(&RouteBackendLabels(t.param(), t.param(), t.param()))
}
Expand Down
6 changes: 4 additions & 2 deletions linkerd/app/outbound/src/http/logical/policy/router.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,10 @@ where
>,
route::MatchedRoute<T, M::Summary, F, E>: route::filters::Apply + svc::Param<classify::Request>,
route::MatchedBackend<T, M::Summary, F>: route::filters::Apply,
route::backend::RouteBackendMetrics:
svc::ExtractParam<route::backend::RequestCount, route::MatchedBackend<T, M::Summary, F>>,
route::backend::RouteBackendMetrics: svc::ExtractParam<
route::backend::BackendHttpMetrics,
route::MatchedBackend<T, M::Summary, F>,
>,
{
/// Builds a stack that applies routes to distribute requests over a cached
/// set of inner services so that.
Expand Down
16 changes: 8 additions & 8 deletions linkerd/app/outbound/src/http/logical/policy/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,18 +130,18 @@ async fn header_based_route() {
.layer(inner)
.new_service(Policy::from((routes, ())));

let default_reqs = metrics.request_count(
let default_reqs = metrics.backend_metrics(
parent_ref.clone(),
default_route_ref.clone(),
default_backend_ref.clone(),
);
let special_reqs = metrics.request_count(
let special_reqs = metrics.backend_metrics(
parent_ref.clone(),
special_route_ref.clone(),
special_backend_ref.clone(),
);
assert_eq!(default_reqs.get(), 0);
assert_eq!(special_reqs.get(), 0);
assert_eq!(default_reqs.requests_total().get(), 0);
assert_eq!(special_reqs.requests_total().get(), 0);

default.allow(1);
special.allow(1);
Expand All @@ -155,8 +155,8 @@ async fn header_based_route() {
_ = time::sleep(time::Duration::from_secs(1)) => panic!("timed out"),
reqrsp = default.next_request() => reqrsp.expect("request"),
};
assert_eq!(default_reqs.get(), 1);
assert_eq!(special_reqs.get(), 0);
assert_eq!(default_reqs.requests_total().get(), 1);
assert_eq!(special_reqs.requests_total().get(), 0);

default.allow(1);
special.allow(1);
Expand All @@ -171,8 +171,8 @@ async fn header_based_route() {
_ = time::sleep(time::Duration::from_secs(1)) => panic!("timed out"),
reqrsp = special.next_request() => reqrsp.expect("request"),
};
assert_eq!(default_reqs.get(), 1);
assert_eq!(special_reqs.get(), 1);
assert_eq!(default_reqs.requests_total().get(), 1);
assert_eq!(special_reqs.requests_total().get(), 1);

// Hold the router to prevent inner services from being dropped.
drop(router);
Expand Down
3 changes: 3 additions & 0 deletions linkerd/http/prom/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ license = "Apache-2.0"
test-util = []

[dependencies]
futures = { version = "0.3", default-features = false }
http = "0.2"
pin-project = "1"
prometheus-client = "0.22"
tokio = { version = "1", features = ["time"] }

linkerd-stack = { path = "../../stack" }
54 changes: 29 additions & 25 deletions linkerd/http/prom/src/count_reqs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use prometheus_client::{
use std::task::{Context, Poll};

#[derive(Clone, Debug)]
pub struct RequestCountFamilies<L: Clone>(Family<L, Counter>);
pub struct RequestCountFamilies<L>(Family<L, Counter>);

#[derive(Clone, Debug)]
pub struct RequestCount(Counter);
Expand All @@ -24,26 +24,6 @@ pub struct CountRequests<S> {
requests: Counter,
}

impl<L> RequestCountFamilies<L>
where
L: EncodeLabelSet + std::fmt::Debug + std::hash::Hash,
L: Eq + Clone + Send + Sync + 'static,
{
pub fn register(registry: &mut Registry) -> Self {
let requests = Family::default();
registry.register(
"requests",
"The total number of requests dispatched",
requests.clone(),
);
Self(requests)
}

pub fn metrics(&self, labels: &L) -> RequestCount {
RequestCount(self.0.get_or_create(labels).clone())
}
}

// === impl NewCountRequests ===

impl<X: Clone, N> NewCountRequests<X, N> {
Expand All @@ -64,16 +44,16 @@ where
type Service = CountRequests<N::Service>;

fn new_service(&self, target: T) -> Self::Service {
let RequestCount(counter) = self.extract.extract_param(&target);
let rc = self.extract.extract_param(&target);
let inner = self.inner.new_service(target);
CountRequests::new(counter, inner)
CountRequests::new(rc, inner)
}
}

// === impl CountRequests ===

impl<S> CountRequests<S> {
fn new(requests: Counter, inner: S) -> Self {
pub(crate) fn new(RequestCount(requests): RequestCount, inner: S) -> Self {
Self { requests, inner }
}
}
Expand All @@ -97,16 +77,40 @@ where
}
}

// === impl RequestCountFamilies ===

impl<L> Default for RequestCountFamilies<L>
where
L: EncodeLabelSet + std::fmt::Debug + std::hash::Hash,
L: Eq + Clone + Send + Sync + 'static,
L: Eq + Clone,
{
fn default() -> Self {
Self(Family::default())
}
}

impl<L> RequestCountFamilies<L>
where
L: EncodeLabelSet + std::fmt::Debug + std::hash::Hash,
L: Eq + Clone + Send + Sync + 'static,
{
pub fn register(registry: &mut Registry) -> Self {
let requests = Family::default();
registry.register(
"requests",
"The total number of requests dispatched",
requests.clone(),
);
Self(requests)
}

pub fn metrics(&self, labels: &L) -> RequestCount {
RequestCount(self.0.get_or_create(labels).clone())
}
}

// === impl RequestCount ===

impl RequestCount {
pub fn get(&self) -> u64 {
self.0.get()
Expand Down
Loading

0 comments on commit 641fc05

Please sign in to comment.