From 2ece7c35799fb7276fb0614728a89cce1e7ad4fa Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Sun, 12 Nov 2023 13:58:13 -0500 Subject: [PATCH] Add support for `Arc` to `metrics::Cow<'a, T>`. (#402) --- metrics-tracing-context/src/lib.rs | 3 +- metrics-util/src/registry/mod.rs | 9 +- metrics/CHANGELOG.md | 9 + metrics/src/common.rs | 13 +- metrics/src/cow.rs | 743 ++++++++++++++++++----------- metrics/src/key.rs | 90 ++-- metrics/tests/macros.rs | 2 +- 7 files changed, 537 insertions(+), 332 deletions(-) diff --git a/metrics-tracing-context/src/lib.rs b/metrics-tracing-context/src/lib.rs index 71d219d4..253a5486 100644 --- a/metrics-tracing-context/src/lib.rs +++ b/metrics-tracing-context/src/lib.rs @@ -107,8 +107,7 @@ pub use label_filter::LabelFilter; use tracing_integration::WithContext; pub use tracing_integration::{Labels, MetricsLayer}; -/// [`TracingContextLayer`] provides an implementation of a [`Layer`][metrics_util::layers::Layer] -/// for [`TracingContext`]. +/// [`TracingContextLayer`] provides an implementation of a [`Layer`] for [`TracingContext`]. pub struct TracingContextLayer { label_filter: F, } diff --git a/metrics-util/src/registry/mod.rs b/metrics-util/src/registry/mod.rs index 548cd460..68a0e5f1 100644 --- a/metrics-util/src/registry/mod.rs +++ b/metrics-util/src/registry/mod.rs @@ -32,17 +32,14 @@ type RegistryHashMap = HashMap>; /// ## Using `Registry` as the basis of an exporter /// /// As a reusable building blocking for building exporter implementations, users should look at -/// [`Key`] and [`AtomicStorage`][crate::registry::AtomicStorage] to use for their key and storage, -/// respectively. +/// [`Key`] and [`AtomicStorage`] to use for their key and storage, respectively. /// /// These two implementations provide behavior that is suitable for most exporters, providing /// seamless integration with the existing key type used by the core /// [`Recorder`][metrics::Recorder] trait, as well as atomic storage for metrics. /// -/// In some cases, users may prefer -/// [`GenerationalAtomicStorage`][crate::registry::GenerationalAtomicStorage] when know if a metric -/// has been touched, even if its value has not changed since the last time it was observed, is -/// necessary. +/// In some cases, users may prefer [`GenerationalAtomicStorage`] when know if a metric has been +/// touched, even if its value has not changed since the last time it was observed, is necessary. /// /// ## Performance /// diff --git a/metrics/CHANGELOG.md b/metrics/CHANGELOG.md index d07576f7..211c4fda 100644 --- a/metrics/CHANGELOG.md +++ b/metrics/CHANGELOG.md @@ -8,6 +8,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - ReleaseDate +### Added + +- Support for using `Arc` with `Cow<'a, T>`. + ([#402](https://github.com/metrics-rs/metrics/pull/402)) + + This will primarily allow using `Arc` for metric names and labels, where previously only + `&'static str` or `String` were allowed. There's still work to be done to also support labels in + this regard. + ## [0.21.1] - 2023-07-02 ### Added diff --git a/metrics/src/common.rs b/metrics/src/common.rs index fdf9a948..6cabedc0 100644 --- a/metrics/src/common.rs +++ b/metrics/src/common.rs @@ -6,12 +6,15 @@ use crate::cow::Cow; /// An allocation-optimized string. /// -/// We specify `SharedString` to attempt to get the best of both worlds: flexibility to provide a -/// static or dynamic (owned) string, while retaining the performance benefits of being able to -/// take ownership of owned strings and borrows of completely static strings. +/// `SharedString` uses a custom copy-on-write implementation that is optimized for metric keys, +/// providing ergonomic sharing of single instances, or slices, of strings and labels. This +/// copy-on-write implementation is optimized to allow for constant-time construction (using static +/// values), as well as accepting owned values and values shared through [`Arc`](std::sync::Arc). /// -/// `SharedString` can be converted to from either `&'static str` or `String`, with a method, -/// `const_str`, from constructing `SharedString` from `&'static str` in a `const` fashion. +/// End users generally will not need to interact with this type directly, as the top-level macros +/// (`counter!`, etc), as well as the various conversion implementations +/// ([`From`](std::convert::From)), generally allow users to pass whichever variant of a value +/// (static, owned, shared) is best for them. pub type SharedString = Cow<'static, str>; /// Key-specific hashing algorithm. diff --git a/metrics/src/cow.rs b/metrics/src/cow.rs index 0fcb4f96..7087fcb5 100644 --- a/metrics/src/cow.rs +++ b/metrics/src/cow.rs @@ -5,32 +5,178 @@ use std::{ hash::{Hash, Hasher}, marker::PhantomData, mem::ManuallyDrop, + ops::Deref, ptr::{slice_from_raw_parts, NonNull}, + sync::Arc, }; -use crate::label::Label; +#[derive(Clone, Copy)] +enum Kind { + Owned, + Borrowed, + Shared, +} -/// A clone-on-write smart pointer with an optimized memory layout. +/// A clone-on-write smart pointer with an optimized memory layout, based on `beef`. +/// +/// # Strings, strings everywhere +/// +/// In `metrics`, strings are arguably the most common data type used despite the fact that metrics +/// are measuring numerical values. Both the name of a metric, and its labels, are strings: emitting +/// a metric may involve one string, or 10 strings. Many of these strings tend to be used over and +/// over during the life of the process, as well. +/// +/// In order to achieve and maintain a high level of performance, we use a "clone-on-write" smart +/// pointer to handle passing these strings around. Doing so allows us to potentially avoid having +/// to allocate entire copies of a string, instead using a lightweight smart pointer that can live +/// on the stack. +/// +/// # Why not `std::borrow::Cow`? +/// +/// The standard library already provides a clone-on-write smart pointer, `std::borrow::Cow`, which +/// works well in many cases. However, `metrics` strives to provide minimal overhead where possible, +/// and so `std::borrow::Cow` falls down in one particular way: it uses an enum representation which +/// consumes an additional word of storage. +/// +/// As an example, let's look at strings. A string in `std::borrow::Cow` implies that `T` is `str`, +/// and the owned version of `str` is simply `String`. Thus, for `std::borrow::Cow`, the in-memory +/// layout looks like this: +/// +/// ```text +/// Padding +/// | +/// v +/// +--------------+-------------+--------------+--------------+ +/// stdlib Cow::Borrowed: | Enum Tag | Pointer | Length | XXXXXXXX | +/// +--------------+-------------+--------------+--------------+ +/// +--------------+-------------+--------------+--------------+ +/// stdlib Cow::Owned: | Enum Tag | Pointer | Length | Capacity | +/// +--------------+-------------+--------------+--------------+ +/// ``` +/// +/// As you can see, you pay a memory size penalty to be able to wrap an owned string. This +/// additional word adds minimal overhead, but we can easily avoid it with some clever logic around +/// the values of the length and capacity fields. +/// +/// There is an existing crate that does just that: `beef`. Instead of using an enum, it is simply a +/// struct that encodes the discriminant values in the length and capacity fields directly. If we're +/// wrapping a borrowed value, we can infer that the "capacity" will always be zero, as we only need +/// to track the capacity when we're wrapping an owned value, in order to be able to recreate the +/// underlying storage when consuming the smart pointer, or dropping it. Instead of the above +/// layout, `beef` looks like this: +/// +/// ```text +/// +-------------+--------------+----------------+ +/// `beef` Cow (borrowed): | Pointer | Length (N) | Capacity (0) | +/// +-------------+--------------+----------------+ +/// +-------------+--------------+----------------+ +/// `beef` Cow (owned): | Pointer | Length (N) | Capacity (M) | +/// +-------------+--------------+----------------+ +/// ``` +/// +/// # Why not `beef`? +/// +/// Up until this point, it might not be clear why we didn't just use `beef`. In truth, our design +/// is fundamentally based on `beef`. Crucially, however, `beef` did not/still does not support +/// `const` construction for generic slices. Remember how we mentioned labels? The labels of a +/// metric `are `[Label]` under-the-hood, and so without a way to construct them in a `const` +/// fashion, our previous work to allow entirely static keys would not be possible. +/// +/// Thus, we forked `beef` and copied into directly into `metrics` so that we could write a +/// specialized `const` constructor for `[Label]`. +/// +/// This is why we have our own `Cow` bundled into `metrics` directly, which is based on `beef`. In +/// doing so, we can experiment with more interesting optimizations, and, as mentioned above, we can +/// add const methods to support all of the types involved in statically building metrics keys. +/// +/// # What we do that `beef` doesn't do +/// +/// It was already enough to use our own implementation for the specialized `const` capabilities, +/// but we've taken things even further in a key way: support for `Arc`-wrapped values. +/// +/// ## `Arc`-wrapped values +/// +/// For many strings, there is still a desire to share them cheaply even when they are constructed +/// at run-time. Remember, cloning a `Cow` of an owned value means cloning the value itself, so we +/// need another level of indirection to allow the cheap sharing, which is where `Arc` can +/// provide value. +/// +/// Users can construct a `Arc`, where `T` is lined up with the `T` of `metrics::Cow`, and use +/// that as the initial value instead. When `Cow` is cloned, we end up cloning the underlying +/// `Arc` instead, avoiding a new allocation. `Arc` still handles all of the normal logic +/// necessary to know when the wrapped value must be dropped, and how many live references to the +/// value that there are, and so on. +/// +/// We handle this by relying on an invariant of `Vec`: it never allocates more than `isize::MAX` +/// [1]. This lets us derive the following truth table of the valid combinations of length/capacity: +/// +/// ```text +/// Length (N) Capacity (M) +/// +---------------+----------------+ +/// Borrowed (&T): | N | 0 | +/// +---------------+----------------+ +/// Owned (T::ToOwned): | N | M < usize::MAX | +/// +---------------+----------------+ +/// Shared (Arc): | N | usize::MAX | +/// +---------------+----------------+ +/// ``` +/// +/// As we only implement `Cow` for types where their owned variants are either explicitly or +/// implicitly backed by `Vec<_>`, we know that our capacity will never be `usize::MAX`, as it is +/// limited to `isize::MAX`, and thus we can safely encode our "shared" state within the capacity +/// field. +/// +/// # Notes +/// +/// [1] - technically, `Vec` can have a capacity greater than `isize::MAX` when storing +/// zero-sized types, but we don't do that here, so we always enforce that an owned version's +/// capacity cannot be `usize::MAX` when constructing `Cow`. pub struct Cow<'a, T: Cowable + ?Sized + 'a> { - /// Pointer to data. ptr: NonNull, - - /// Pointer metadata: length and capacity. - meta: Metadata, - - /// Lifetime marker. - marker: PhantomData<&'a T>, + metadata: Metadata, + _lifetime: PhantomData<&'a T>, } impl Cow<'_, T> where T: Cowable + ?Sized, { - #[inline] - pub fn owned(val: T::Owned) -> Self { - let (ptr, meta) = T::owned_into_parts(val); + fn from_parts(ptr: NonNull, metadata: Metadata) -> Self { + Self { ptr, metadata, _lifetime: PhantomData } + } + + /// Creates a pointer to an owned value, consuming it. + pub fn from_owned(owned: T::Owned) -> Self { + let (ptr, metadata) = T::owned_into_parts(owned); - Cow { ptr, meta, marker: PhantomData } + // This check is partially to guard against the semantics of `Vec` changing in the + // future, and partially to ensure that we don't somehow implement `Cowable` for a type + // where its owned version is backed by a vector of ZSTs, where the capacity could + // _legitimately_ be `usize::MAX`. + if metadata.capacity() == usize::MAX { + panic!("Invalid capacity of `usize::MAX` for owned value."); + } + + Self::from_parts(ptr, metadata) + } + + /// Creates a pointer to a shared value. + pub fn from_shared(arc: Arc) -> Self { + let (ptr, metadata) = T::shared_into_parts(arc); + Self::from_parts(ptr, metadata) + } + + /// Extracts the owned data. + /// + /// Clones the data if it is not already owned. + pub fn into_owned(self) -> ::Owned { + // We need to ensure that our own `Drop` impl does _not_ run because we're simply + // transferring ownership of the value back to the caller. For borrowed values, this is + // naturally a no-op because there's nothing to drop, but for owned values, like `String` or + // `Arc`, we wouldn't want to double drop. + let cow = ManuallyDrop::new(self); + + T::owned_from_parts(cow.ptr, &cow.metadata) } } @@ -38,71 +184,69 @@ impl<'a, T> Cow<'a, T> where T: Cowable + ?Sized, { - #[inline] - pub fn borrowed(val: &'a T) -> Self { - let (ptr, meta) = T::ref_into_parts(val); + /// Creates a pointer to a borrowed value. + pub fn from_borrowed(borrowed: &'a T) -> Self { + let (ptr, metadata) = T::borrowed_into_parts(borrowed); - Cow { ptr, meta, marker: PhantomData } + Self::from_parts(ptr, metadata) } +} - #[inline] - pub fn into_owned(self) -> T::Owned { - let cow = ManuallyDrop::new(self); - - if cow.is_borrowed() { - unsafe { T::clone_from_parts(cow.ptr, &cow.meta) } - } else { - unsafe { T::owned_from_parts(cow.ptr, &cow.meta) } - } - } +impl<'a, T> Cow<'a, [T]> +where + T: Clone, +{ + pub const fn const_slice(val: &'a [T]) -> Cow<'a, [T]> { + // SAFETY: We can never create a null pointer by casting a reference to a pointer. + let ptr = unsafe { NonNull::new_unchecked(val.as_ptr() as *mut _) }; + let metadata = Metadata::borrowed(val.len()); - #[inline] - pub fn is_borrowed(&self) -> bool { - self.meta.capacity() == 0 + Self { ptr, metadata, _lifetime: PhantomData } } +} - #[inline] - pub fn is_owned(&self) -> bool { - self.meta.capacity() != 0 - } +impl<'a> Cow<'a, str> { + pub const fn const_str(val: &'a str) -> Self { + // SAFETY: We can never create a null pointer by casting a reference to a pointer. + let ptr = unsafe { NonNull::new_unchecked(val.as_ptr() as *mut _) }; + let metadata = Metadata::borrowed(val.len()); - #[inline] - fn borrow(&self) -> &T { - unsafe { &*T::ref_from_parts(self.ptr, &self.meta) } + Self { ptr, metadata, _lifetime: PhantomData } } } -// Implementations of constant functions for creating `Cow` via static strings, static string -// slices, and static label slices. -impl<'a> Cow<'a, str> { - pub const fn const_str(val: &'a str) -> Self { - Cow { - // We are casting *const T to *mut T, however for all borrowed values - // this raw pointer is only ever dereferenced back to &T. - ptr: unsafe { NonNull::new_unchecked(val.as_ptr() as *mut u8) }, - meta: Metadata::from_ref(val.len()), - marker: PhantomData, - } +impl Deref for Cow<'_, T> +where + T: Cowable + ?Sized, +{ + type Target = T; + + fn deref(&self) -> &Self::Target { + let borrowed_ptr = T::borrowed_from_parts(self.ptr, &self.metadata); + + // SAFETY: We only ever hold a pointer to a borrowed value of at least the lifetime of + // `Self`, or an owned value which we have ownership of (albeit indirectly when using + // `Arc`), so our pointer is always valid and live for derefencing. + unsafe { borrowed_ptr.as_ref().unwrap() } } } -impl<'a> Cow<'a, [Cow<'static, str>]> { - pub const fn const_slice(val: &'a [Cow<'static, str>]) -> Self { - Cow { - ptr: unsafe { NonNull::new_unchecked(val.as_ptr() as *mut Cow<'static, str>) }, - meta: Metadata::from_ref(val.len()), - marker: PhantomData, - } +impl Clone for Cow<'_, T> +where + T: Cowable + ?Sized, +{ + fn clone(&self) -> Self { + let (ptr, metadata) = T::clone_from_parts(self.ptr, &self.metadata); + Self { ptr, metadata, _lifetime: PhantomData } } } -impl<'a> Cow<'a, [Label]> { - pub const fn const_slice(val: &'a [Label]) -> Self { - Cow { - ptr: unsafe { NonNull::new_unchecked(val.as_ptr() as *mut Label) }, - meta: Metadata::from_ref(val.len()), - marker: PhantomData, - } +impl Drop for Cow<'_, T> +where + T: Cowable + ?Sized, +{ + fn drop(&mut self) { + T::drop_from_parts(self.ptr, &self.metadata); } } @@ -112,7 +256,7 @@ where { #[inline] fn hash(&self, state: &mut H) { - self.borrow().hash(state) + self.deref().hash(state) } } @@ -123,7 +267,7 @@ where { #[inline] fn default() -> Self { - Cow::borrowed(Default::default()) + Cow::from_borrowed(Default::default()) } } @@ -136,7 +280,7 @@ where { #[inline] fn partial_cmp(&self, other: &Cow<'_, B>) -> Option { - PartialOrd::partial_cmp(self.borrow(), other.borrow()) + PartialOrd::partial_cmp(self.deref(), other.deref()) } } @@ -146,7 +290,7 @@ where { #[inline] fn cmp(&self, other: &Self) -> Ordering { - Ord::cmp(self.borrow(), other.borrow()) + Ord::cmp(self.deref(), other.deref()) } } @@ -156,77 +300,44 @@ where { #[inline] fn from(val: &'a T) -> Self { - Cow::borrowed(val) - } -} - -impl From> for Cow<'_, str> { - #[inline] - fn from(s: std::borrow::Cow<'static, str>) -> Self { - match s { - std::borrow::Cow::Borrowed(bs) => Cow::borrowed(bs), - std::borrow::Cow::Owned(os) => Cow::owned(os), - } - } -} - -impl From for Cow<'_, str> { - #[inline] - fn from(s: String) -> Self { - Cow::owned(s) - } -} - -impl From> for Cow<'_, [Label]> { - #[inline] - fn from(v: Vec