diff --git a/commons/zenoh-keyexpr/src/key_expr/borrowed.rs b/commons/zenoh-keyexpr/src/key_expr/borrowed.rs index ca3a4c7bbc..4f8762e843 100644 --- a/commons/zenoh-keyexpr/src/key_expr/borrowed.rs +++ b/commons/zenoh-keyexpr/src/key_expr/borrowed.rs @@ -11,13 +11,12 @@ // Contributors: // ZettaScale Zenoh Team, // + use super::{canon::Canonizable, OwnedKeyExpr, FORBIDDEN_CHARS}; -// use crate::core::WireExpr; use alloc::{ borrow::{Borrow, ToOwned}, format, string::String, - vec, vec::Vec, }; use core::{ @@ -44,7 +43,7 @@ use zenoh_result::{bail, Error as ZError, ZResult}; /// * Two sets A and B are equal if all A includes B and B includes A. The Key Expression language is designed so that string equality is equivalent to set equality. #[allow(non_camel_case_types)] #[repr(transparent)] -#[derive(PartialEq, Eq, Hash)] +#[derive(PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct keyexpr(str); impl keyexpr { @@ -129,6 +128,11 @@ impl keyexpr { self.0.contains(super::SINGLE_WILD as char) } + pub(crate) const fn is_double_wild(&self) -> bool { + let bytes = self.0.as_bytes(); + bytes.len() == 2 && bytes[0] == b'*' + } + /// Returns the longest prefix of `self` that doesn't contain any wildcard character (`**` or `$*`). /// /// NOTE: this operation can typically be used in a backend implementation, at creation of a Storage to get the keys prefix, @@ -265,8 +269,8 @@ impl keyexpr { result } - pub fn as_str(&self) -> &str { - self + pub const fn as_str(&self) -> &str { + &self.0 } /// # Safety @@ -274,7 +278,7 @@ impl keyexpr { /// /// Much like [`core::str::from_utf8_unchecked`], this is memory-safe, but calling this without maintaining /// [`keyexpr`]'s invariants yourself may lead to unexpected behaviors, the Zenoh network dropping your messages. - pub unsafe fn from_str_unchecked(s: &str) -> &Self { + pub const unsafe fn from_str_unchecked(s: &str) -> &Self { core::mem::transmute(s) } @@ -286,11 +290,247 @@ impl keyexpr { pub unsafe fn from_slice_unchecked(s: &[u8]) -> &Self { core::mem::transmute(s) } - pub fn chunks(&self) -> impl Iterator + DoubleEndedIterator { - self.split('/').map(|c| unsafe { - // Any chunk of a valid KE is itself a valid KE => we can safely call the unchecked constructor. - Self::from_str_unchecked(c) - }) + pub const fn chunks(&self) -> Chunks { + Chunks { + inner: self.as_str(), + } + } + pub(crate) fn next_delimiter(&self, i: usize) -> Option { + self.as_str() + .get(i + 1..) + .and_then(|s| s.find('/').map(|j| i + 1 + j)) + } + pub(crate) fn previous_delimiter(&self, i: usize) -> Option { + self.as_str().get(..i).and_then(|s| s.rfind('/')) + } + pub(crate) fn first_byte(&self) -> u8 { + unsafe { *self.as_bytes().get_unchecked(0) } + } + pub(crate) fn iter_splits_ltr(&self) -> SplitsLeftToRight { + SplitsLeftToRight { + inner: self, + index: 0, + } + } + pub(crate) fn iter_splits_rtl(&self) -> SplitsRightToLeft { + SplitsRightToLeft { + inner: self, + index: self.len(), + } + } +} +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) struct SplitsLeftToRight<'a> { + inner: &'a keyexpr, + index: usize, +} +impl<'a> SplitsLeftToRight<'a> { + fn right(&self) -> &'a str { + &self.inner[self.index + ((self.index != 0) as usize)..] + } + fn left(&self, followed_by_double: bool) -> &'a str { + &self.inner[..(self.index + ((self.index != 0) as usize + 2) * followed_by_double as usize)] + } +} +impl<'a> Iterator for SplitsLeftToRight<'a> { + type Item = (&'a keyexpr, &'a keyexpr); + fn next(&mut self) -> Option { + match self.index < self.inner.len() { + false => None, + true => { + let right = self.right(); + let double_wild = right.starts_with("**"); + let left = self.left(double_wild); + self.index = if left.is_empty() { + self.inner.next_delimiter(0).unwrap_or(self.inner.len()) + } else { + self.inner + .next_delimiter(left.len()) + .unwrap_or(self.inner.len() + (left.len() == self.inner.len()) as usize) + }; + if left.is_empty() { + self.next() + } else { + (!right.is_empty()).then(|| unsafe { + ( + keyexpr::from_str_unchecked(left), + keyexpr::from_str_unchecked(right), + ) + }) + } + } + } + } +} +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) struct SplitsRightToLeft<'a> { + inner: &'a keyexpr, + index: usize, +} +impl<'a> SplitsRightToLeft<'a> { + fn right(&self, followed_by_double: bool) -> &'a str { + &self.inner[(self.index + - ((self.index != self.inner.len()) as usize + 2) * followed_by_double as usize)..] + } + fn left(&self) -> &'a str { + &self.inner[..(self.index - ((self.index != self.inner.len()) as usize))] + } +} +impl<'a> Iterator for SplitsRightToLeft<'a> { + type Item = (&'a keyexpr, &'a keyexpr); + fn next(&mut self) -> Option { + match self.index { + 0 => None, + _ => { + let left = self.left(); + let double_wild = left.ends_with("**"); + let right = self.right(double_wild); + self.index = if right.is_empty() { + self.inner + .previous_delimiter(self.inner.len()) + .map_or(0, |n| n + 1) + } else { + self.inner + .previous_delimiter( + self.inner.len() + - right.len() + - (self.inner.len() != right.len()) as usize, + ) + .map_or(0, |n| n + 1) + }; + if right.is_empty() { + self.next() + } else { + (!left.is_empty()).then(|| unsafe { + ( + keyexpr::from_str_unchecked(left), + keyexpr::from_str_unchecked(right), + ) + }) + } + } + } + } +} +#[test] +fn splits() { + let ke = keyexpr::new("a/**/b/c").unwrap(); + let mut splits = ke.iter_splits_ltr(); + assert_eq!( + splits.next(), + Some(( + keyexpr::new("a/**").unwrap(), + keyexpr::new("**/b/c").unwrap() + )) + ); + assert_eq!( + splits.next(), + Some((keyexpr::new("a/**/b").unwrap(), keyexpr::new("c").unwrap())) + ); + assert_eq!(splits.next(), None); + let mut splits = ke.iter_splits_rtl(); + assert_eq!( + splits.next(), + Some((keyexpr::new("a/**/b").unwrap(), keyexpr::new("c").unwrap())) + ); + assert_eq!( + splits.next(), + Some(( + keyexpr::new("a/**").unwrap(), + keyexpr::new("**/b/c").unwrap() + )) + ); + assert_eq!(splits.next(), None); + let ke = keyexpr::new("**").unwrap(); + let mut splits = ke.iter_splits_ltr(); + assert_eq!( + splits.next(), + Some((keyexpr::new("**").unwrap(), keyexpr::new("**").unwrap())) + ); + assert_eq!(splits.next(), None); + let ke = keyexpr::new("ab").unwrap(); + let mut splits = ke.iter_splits_ltr(); + assert_eq!(splits.next(), None); + let ke = keyexpr::new("ab/cd").unwrap(); + let mut splits = ke.iter_splits_ltr(); + assert_eq!( + splits.next(), + Some((keyexpr::new("ab").unwrap(), keyexpr::new("cd").unwrap())) + ); + assert_eq!(splits.next(), None); + for (i, ke) in crate::fuzzer::KeyExprFuzzer(rand::thread_rng()) + .take(100) + .enumerate() + { + dbg!(i, &ke); + let splits = ke.iter_splits_ltr().collect::>(); + assert_eq!(splits, { + let mut rtl_rev = ke.iter_splits_rtl().collect::>(); + rtl_rev.reverse(); + rtl_rev + }); + assert!(!splits + .iter() + .any(|s| s.0.ends_with('/') || s.1.starts_with('/'))); + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Chunks<'a> { + inner: &'a str, +} +impl<'a> Chunks<'a> { + /// Convert the remaining part of the iterator to a keyexpr if it is not empty. + pub const fn as_keyexpr(self) -> Option<&'a keyexpr> { + match self.inner.is_empty() { + true => None, + _ => Some(unsafe { keyexpr::from_str_unchecked(self.inner) }), + } + } + /// Peek at the next chunk without consuming it. + pub fn peek(&self) -> Option<&keyexpr> { + if self.inner.is_empty() { + None + } else { + Some(unsafe { + keyexpr::from_str_unchecked( + &self.inner[..self.inner.find('/').unwrap_or(self.inner.len())], + ) + }) + } + } + /// Peek at the last chunk without consuming it. + pub fn peek_back(&self) -> Option<&keyexpr> { + if self.inner.is_empty() { + None + } else { + Some(unsafe { + keyexpr::from_str_unchecked( + &self.inner[self.inner.rfind('/').map_or(0, |i| i + 1)..], + ) + }) + } + } +} +impl<'a> Iterator for Chunks<'a> { + type Item = &'a keyexpr; + fn next(&mut self) -> Option { + if self.inner.is_empty() { + return None; + } + let (next, inner) = self.inner.split_once('/').unwrap_or((self.inner, "")); + self.inner = inner; + Some(unsafe { keyexpr::from_str_unchecked(next) }) + } +} +impl<'a> DoubleEndedIterator for Chunks<'a> { + fn next_back(&mut self) -> Option { + if self.inner.is_empty() { + return None; + } + let (inner, next) = self.inner.rsplit_once('/').unwrap_or(("", self.inner)); + self.inner = inner; + Some(unsafe { keyexpr::from_str_unchecked(next) }) } } diff --git a/commons/zenoh-keyexpr/src/key_expr/format/mod.rs b/commons/zenoh-keyexpr/src/key_expr/format/mod.rs index dbdf0e6446..78b9f240af 100644 --- a/commons/zenoh-keyexpr/src/key_expr/format/mod.rs +++ b/commons/zenoh-keyexpr/src/key_expr/format/mod.rs @@ -12,10 +12,45 @@ // ZettaScale Zenoh Team, // +//! # Building and parsing Key Expressions +//! A common issue in REST API is the association of meaning to sections of the URL, and respecting that API in a convenient manner. +//! The same issue arises naturally when designing a KE space, and [`KeFormat`] was designed to help you with this, +//! both in constructing and in parsing KEs that fit the formats you've defined. +//! +//! [`kedefine`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.kedefine.html) also allows you to define formats at compile time, allowing a more performant, but more importantly safer and more convenient use of said formats, +//! as the [`keformat`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.keformat.html) and [`kewrite`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.kewrite.html) macros will be able to tell you if you're attempting to set fields of the format that do not exist. +//! +//! ## The format syntax +//! KE formats are defined following a syntax that extends the [`keyexpr`] syntax. In addition to existing chunk types, KE formmats support "specification" chunks. +//! These chunks must follow the one of the following syntaxes: `${id:pattern}`, `${id:pattern#default}`, `$#{id:pattern}#`, or `$#{id:pattern#default}#`, where: +//! - `id` is the chunk identifer: it cannot contain the `:` character, and is used to name the chunk in accessors. +//! - `pattern` must be a valid KE (and therefore cannot contain `#`) and defines the range of values that the chunk may adopt. +//! - `default` (optional) is used as the chunk value when formatting if the builder wasn't supplied with a value for `id`. +//! +//! ## Formatting +//! To use a format to build a Key Expression, its [formatter](KeFormat::formatter) must be constructed. +//! +//! A formatter functions like as an `id`-value map which can be [`KeFormatter::build`] into a [`OwnedKeyExpr`] once all specs have a value. +//! +//! The formatter will notably prevent you from setting values for a spec that isn't included by its pattern. +//! +//! ## Parsing +//! [`KeFormat`] can also be used to parse any [`keyexpr`] that intersects with it, using [`KeFormat::parse`]. +//! +//! The parser will then assign subsections of the [`keyexpr`] to each spec, and the resulting [`Parsed`] result can then be queried +//! for each spec's assigned value. +//! +//! Specs are considered greedy and evaluated left-to-right: if your format would allow ambiguous parsings, chunks will be consumed +//! by the leftmost specs first. For example `${a:**}/-/${b:**}` parsing `hey/-/-/there` would assign `hey/-` to `a` and `there` to `b`, +//! (even though you might have expected `a` to only consume `hey` and `b` to consume the remaining `-/there`). +//! +//! A good way to avoid ambiguities when working with formats that contain multiple `**` specs is to separate such specs using verbatim chunks +//! (chunks that start with an `@`), as `**` is incapable of consuming these chunks. + use alloc::{boxed::Box, string::String, vec::Vec}; use core::{ convert::{TryFrom, TryInto}, - fmt::Display, + fmt::{Debug, Display}, num::NonZeroU32, }; @@ -27,25 +62,66 @@ mod support; pub use support::{IKeFormatStorage, Segment}; use support::{IterativeConstructor, Spec}; -/// A utility to define Key Expression (KE) formats. +/// # Building and parsing Key Expressions +/// A common issue in REST API is the association of meaning to sections of the URL, and respecting that API in a convenient manner. +/// The same issue arises naturally when designing a KE space, and [`KeFormat`] was designed to help you with this, +/// both in constructing and in parsing KEs that fit the formats you've defined. +/// +/// [`zenoh::kedefine`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.kedefine.html) also allows you to define formats at compile time, allowing a more performant, but more importantly safer and more convenient use of said formats, +/// as the [`zenoh::keformat`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.keformat.html) and [`zenoh::kewrite`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.kewrite.html) macros will be able to tell you if you're attempting to set fields of the format that do not exist. +/// +/// ## The format syntax +/// KE formats are defined following a syntax that extends the [`keyexpr`] syntax. In addition to existing chunk types, KE formmats support "specification" chunks. +/// These chunks must follow the one of the following syntaxes: `${id:pattern}`, `${id:pattern#default}`, `$#{id:pattern}#`, or `$#{id:pattern#default}#`, where: +/// - `id` is the chunk identifer: it cannot contain the `:` character, and is used to name the chunk in accessors. +/// - `pattern` must be a valid KE (and therefore cannot contain `#`) and defines the range of values that the chunk may adopt. +/// - `default` (optional) is used as the chunk value when formatting if the builder wasn't supplied with a value for `id`. +/// +/// ## Formatting +/// To use a format to build a Key Expression, its [formatter](KeFormat::formatter) must be constructed. /// -/// Formats are written like KEs, except sections can be substituted for specs using the `${id:pattern#default}` format to define fields. -/// `id` is the name of the field that gets encoded in that section, it must be non-empty and will stop at the first encountered `:`. -/// `pattern` is a KE pattern that any value set for that field must match. It stops at the first encountered `#` or end of spec. -/// `default` is optional, and lets you specify a value at construction for the field. +/// A formatter functions like as an `id`-value map which can be [`KeFormatter::build`] into a [`OwnedKeyExpr`] once all specs have a value. /// -/// Note that the spec is considered to end at the first encountered `}`; if you need your id, pattern or default to contain `}`, you may use `$#{spec}#. +/// The formatter will notably prevent you from setting values for a spec that isn't included by its pattern. /// -/// Specs may only be preceded and followed by `/`. -#[derive(Debug, Clone, Copy, Hash)] +/// ## Parsing +/// [`KeFormat`] can also be used to parse any [`keyexpr`] that intersects with it, using [`KeFormat::parse`]. +/// +/// The parser will then assign subsections of the [`keyexpr`] to each spec, and the resulting [`Parsed`] result can then be queried +/// for each spec's assigned value. +/// +/// Specs are considered greedy and evaluated left-to-right: if your format would allow ambiguous parsings, chunks will be consumed +/// by the leftmost specs first. For example `${a:**}/-/${b:**}` parsing `hey/-/-/there` would assign `hey/-` to `a` and `there` to `b`, +/// (even though you might have expected `a` to only consume `hey` and `b` to consume the remaining `-/there`). +/// +/// A good way to avoid ambiguities when working with formats that contain multiple `**` specs is to separate such specs using verbatim chunks +/// (chunks that start with an `@`), as `**` is incapable of consuming these chunks. +#[derive(Clone, Copy, Hash)] pub struct KeFormat<'s, Storage: IKeFormatStorage<'s> + 's = Vec>> { + /// The [`[Segment]`](Segment)s of the format. storage: Storage, + /// The end of the format. It may be one of 3 cases: + /// - An empty string, in which case the format ends with the last segment. + /// - A keyexpr preceded by `/`. + /// - A keyexpr, in the case the format contains no specs. suffix: &'s str, } +impl<'s, Storage: IKeFormatStorage<'s>> Debug for KeFormat<'s, Storage> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{self}") + } +} impl<'s> KeFormat<'s, Vec>> { + /// Construct a new [`KeFormat`], using a vector to store its state-machine and parser results. pub fn new + ?Sized>(value: &'s S) -> ZResult { value.as_ref().try_into() } + /// Construct a new [`KeFormat], using a stack-allocated array to store its state-machine and parser results. + /// + /// `N` is simply the number of specifications in `value`. If this number of specs isn't known at compile-time, use [`KeFormat::new`] instead. + /// + /// If you know `value` at compile time, using [`zenoh::kedefine`](https://docs.rs/zenoh/0.10.1-rc/zenoh/macro.kedefine.html) instead is advised, + /// as it will provide more features and construct higher performance formats than this constructor. pub fn noalloc_new(value: &'s str) -> ZResult; N]>> { value.try_into() } @@ -138,6 +214,7 @@ pub mod macro_support { } } impl<'s, Storage: IKeFormatStorage<'s> + 's> KeFormat<'s, Storage> { + /// Constructs a new formatter for the format. pub fn formatter(&'s self) -> KeFormatter<'s, Storage> { KeFormatter { format: self, @@ -216,10 +293,11 @@ impl<'s, Storage: IKeFormatStorage<'s> + 's> TryFrom<&'s str> for KeFormat<'s, S bail!("Invalid KeFormat: {value} contains duplicated ids") } } - Ok(KeFormat { - storage, - suffix: &value[segment_start..], - }) + let suffix = &value[segment_start..]; + if suffix.contains('*') { + bail!("Invalid KeFormat: wildcards are only allowed in specs when writing formats") + } + Ok(KeFormat { storage, suffix }) } } @@ -258,6 +336,8 @@ impl<'s, Storage: IKeFormatStorage<'s> + 's> core::fmt::Display for KeFormat<'s, write!(f, "{}", self.suffix) } } + +/// An active formatter for a [`KeFormat`] #[derive(Clone)] pub struct KeFormatter<'s, Storage: IKeFormatStorage<'s>> { format: &'s KeFormat<'s, Storage>, diff --git a/commons/zenoh-keyexpr/src/key_expr/format/parsing.rs b/commons/zenoh-keyexpr/src/key_expr/format/parsing.rs index 18509201ec..1c72e1d431 100644 --- a/commons/zenoh-keyexpr/src/key_expr/format/parsing.rs +++ b/commons/zenoh-keyexpr/src/key_expr/format/parsing.rs @@ -14,8 +14,8 @@ use zenoh_result::{bail, ZResult}; -use super::{support::trim_suffix_slash, IKeFormatStorage, KeFormat, Segment}; -use crate::key_expr::{format::support::trim_prefix_slash, keyexpr}; +use super::{IKeFormatStorage, KeFormat, Segment}; +use crate::key_expr::keyexpr; pub struct Parsed<'s, Storage: IKeFormatStorage<'s>> { format: &'s KeFormat<'s, Storage>, @@ -47,7 +47,11 @@ impl<'s, Storage: IKeFormatStorage<'s>> IntoIterator for &'s Parsed<'s, Storage> type Item = ::Item; type IntoIter = Iter<'s, Storage>; fn into_iter(self) -> Self::IntoIter { - todo!() + Iter { + parsed: self, + start: 0, + end: self.format.storage.segments().len(), + } } } pub struct Iter<'s, Storage: IKeFormatStorage<'s>> { @@ -95,147 +99,129 @@ impl<'s, Storage: IKeFormatStorage<'s>> DoubleEndedIterator for Iter<'s, Storage } impl<'s, Storage: IKeFormatStorage<'s> + 's> KeFormat<'s, Storage> { + /// Parses `target` according to `self`. The returned `Parsed` object can be used to extract the values of the fields in `self` from `target`. + /// + /// The parser is spec-greedy, fixed-section lazy: it will consume as much of `target` as possible for each spec in the format. + /// + /// For example, `**/${spec:**}/**` will always assign all of `target` to `spec`. + /// + /// # Errors + /// If `target` does not intersect with `self`, an error is returned. pub fn parse(&'s self, target: &'s keyexpr) -> ZResult> { + dbg!(target, self); let segments = self.storage.segments(); + if segments.is_empty() + && !target.intersects(unsafe { keyexpr::from_str_unchecked(self.suffix) }) + { + bail!("{target} does not intersect with {self}") + } let mut results = self.storage.values_storage(|_| None); - let Some(target) = target.strip_suffix(self.suffix) else { - if !segments.is_empty() - && segments.iter().all(|s| s.spec.pattern() == "**") - && self.suffix.as_bytes()[0] == b'/' - && target == &self.suffix[1..] - { - return Ok(Parsed { - format: self, - results, - }); + let results_mut = results.as_mut(); + debug_assert_eq!(segments.len(), results_mut.len()); + let found = 'a: { + match self.suffix.as_bytes() { + [] => do_parse(Some(target), segments, results_mut), + [b'/', suffix @ ..] => { + let suffix = unsafe { keyexpr::from_slice_unchecked(suffix) }; + for (target, candidate) in target.iter_splits_rtl() { + if suffix.intersects(candidate) + && do_parse(Some(target), segments, results_mut) + { + break 'a true; + } + } + suffix.intersects(target) && do_parse(None, segments, results_mut) + } + _ => { + unreachable!(); + } } - bail!("{target} is not included in {self}") }; - assert_eq!(segments.len(), results.as_mut().len()); - if do_parse(target, segments, results.as_mut()) { + if found { Ok(Parsed { format: self, results, }) } else { - bail!("{target} is not included in {self}") + bail!("{target} does not intersect with {self}") } } } -fn do_parse<'s>( - input: &'s str, - segments: &[Segment<'s>], - results: &mut [Option<&'s keyexpr>], +fn do_parse<'a>( + target: Option<&'a keyexpr>, + segments: &[Segment], + results: &mut [Option<&'a keyexpr>], ) -> bool { - debug_assert!(!input.starts_with('/')); - // Parsing is finished if there are no more segments to process AND the input is now empty. - let [segment, segments @ ..] = segments else { - return input.is_empty(); - }; - let [result, results @ ..] = results else { - unreachable!() - }; - // reset result to None in case of backtracking - *result = None; - // Inspect the pattern: we want to know how many chunks we need to have a chance of inclusion, as well as if we need to worry about double wilds - let pattern = segment.spec.pattern(); - let mut has_double_wilds = false; - let min_chunks = pattern - .split('/') - .filter(|s| { - if *s == "**" { - has_double_wilds = true; - false - } else { - true + dbg!(target, segments); + match (segments, results) { + ([], []) => target.map_or(true, keyexpr::is_double_wild), + ([segment, segments @ ..], [result, results @ ..]) => { + let prefix = dbg!(segment.prefix()); + let pattern = dbg!(segment.pattern()); + // if target is empty + let Some(target) = target else { + // this segment only matches if the pattern is `**` and the prefix is empty (since it cannot be `**`) + if dbg!(prefix.is_none() && pattern.is_double_wild()) { + *result = None; + // the next segments still have to be checked to respect the same condition + return dbg!(!segments.iter().zip(results).any(|(segment, result)| { + *result = None; + segment.prefix().is_some() || !segment.pattern().is_double_wild() + })); + } else { + return false; + } + }; + macro_rules! try_intersect { + ($pattern: expr, $result: expr, $target: expr, $segments: expr, $results: expr) => {{ + let target = $target; + let segments = $segments; + dbg!($pattern, target); + if $pattern.intersects(target) + && do_parse( + target.is_double_wild().then_some(target), + segments, + $results, + ) + { + *$result = Some(dbg!(target)); + return true; + } + for (candidate, target) in target.iter_splits_rtl() { + if $pattern.intersects(dbg!(candidate)) + && do_parse(Some(target), segments, $results) + { + *result = Some(candidate); + return true; + } + } + if $pattern.is_double_wild() && do_parse(Some(target), segments, $results) { + *$result = None; + return true; + } + }}; } - }) - .count(); - // Since input is /-stripped, we need to strip it from the prefix too. - let prefix = trim_prefix_slash(segment.prefix); - // We handle double-wild segments that may branch in a different function, to keep this one tail-recursive - if has_double_wilds { - return do_parse_doublewild( - input, segments, results, result, pattern, prefix, min_chunks, - ); - } - // Strip the prefix (including the end-/ if the prefix is non-empty) - let Some(input) = input.strip_prefix(prefix) else { - return false; - }; - let mut chunks = 0; - for i in (0..input.len()).filter(|i| input.as_bytes()[*i] == b'/') { - chunks += 1; - if chunks < min_chunks { - continue; - } - let r = keyexpr::new(&input[..i]).expect("any subsection of a keyexpr is a keyexpr"); - if pattern.includes(r) { - *result = Some(r); - return do_parse(trim_prefix_slash(&input[(i + 1)..]), segments, results); - } else { - return false; - } - } - chunks += 1; - if chunks < min_chunks { - return false; - } - let r = keyexpr::new(input).expect("any subsection of a keyexpr is a keyexpr"); - if pattern.includes(r) { - *result = Some(r); - do_parse("", segments, results) - } else { - false - } -} -fn do_parse_doublewild<'s>( - input: &'s str, - segments: &[Segment<'s>], - results: &mut [Option<&'s keyexpr>], - result: &mut Option<&'s keyexpr>, - pattern: &keyexpr, - prefix: &str, - min_chunks: usize, -) -> bool { - if min_chunks == 0 { - if let Some(input) = input.strip_prefix(trim_suffix_slash(prefix)) { - if do_parse(trim_prefix_slash(input), segments, results) { - return true; + //if the prefix can be compressed to empty, + if prefix.is_none() { + try_intersect!(pattern, result, target, segments, results); } - } else { - return false; - } - } - let Some(input) = input.strip_prefix(prefix) else { - return false; - }; - let input = trim_prefix_slash(input); - let mut chunks = 0; - for i in (0..input.len()).filter(|i| input.as_bytes()[*i] == b'/') { - chunks += 1; - if chunks < min_chunks { - continue; - } - let r = keyexpr::new(&input[..i]).expect("any subsection of a keyexpr is a keyexpr"); - if pattern.includes(r) { - *result = Some(r); - if do_parse(trim_prefix_slash(&input[(i + 1)..]), segments, results) { - return true; + // iterate through as many splits as `prefix` could possibly consume. + for (candidate, target) in target.iter_splits_ltr().take(match prefix { + None => 1, + Some(prefix) => (prefix.bytes().filter(|&c| c == b'/').count() + 1) * 3, + }) { + if prefix.map_or(candidate.is_double_wild(), |prefix| { + dbg!(prefix).intersects(dbg!(candidate)) + }) { + try_intersect!(pattern, result, target, segments, results); + } } + dbg!(pattern.is_double_wild()) + && prefix.map_or(false, |prefix| dbg!(prefix.intersects(target))) + && do_parse(None, segments, results) } - } - chunks += 1; - if chunks < min_chunks { - return false; - } - let r = keyexpr::new(input).expect("any subsection of a keyexpr is a keyexpr"); - if pattern.includes(r) { - *result = Some(r); - do_parse("", segments, results) - } else { - false + _ => unreachable!(), } } @@ -243,7 +229,7 @@ fn do_parse_doublewild<'s>( fn parsing() { use crate::key_expr::OwnedKeyExpr; use core::convert::TryFrom; - for a_spec in ["${a:*}", "a/${a:*}", "a/${a:*/**}"] { + for a_spec in ["${a:*}", "a/${a:*}"] { for b_spec in ["b/${b:**}", "${b:**}"] { let specs = [a_spec, b_spec, "c"]; for spec in [2, 3] { @@ -263,4 +249,76 @@ fn parsing() { } } } + KeFormat::new("**/${a:**}/${b:**}/**").unwrap_err(); + let format = KeFormat::new("${a:**}/${b:**}").unwrap(); + assert_eq!( + format + .parse(keyexpr::new("a/b/c").unwrap()) + .unwrap() + .get("a") + .unwrap() + .unwrap() + .as_str(), + "a/b/c" + ); + assert_eq!( + format + .parse(keyexpr::new("**").unwrap()) + .unwrap() + .get("a") + .unwrap() + .unwrap() + .as_str(), + "**" + ); + assert_eq!( + format + .parse(keyexpr::new("**").unwrap()) + .unwrap() + .get("b") + .unwrap() + .unwrap() + .as_str(), + "**" + ); + let format = KeFormat::new("hi/${a:there}/${b:**}").unwrap(); + assert_eq!( + format + .parse(keyexpr::new("hi/**").unwrap()) + .unwrap() + .get("a") + .unwrap() + .unwrap() + .as_str(), + "**" + ); + assert_eq!( + format + .parse(keyexpr::new("hi/**").unwrap()) + .unwrap() + .get("b") + .unwrap() + .unwrap() + .as_str(), + "**" + ); + let format = KeFormat::new("hi/${a:there}/@/${b:**}").unwrap(); + assert_eq!( + format + .parse(keyexpr::new("hi/**/@").unwrap()) + .unwrap() + .get("a") + .unwrap() + .unwrap() + .as_str(), + "**" + ); + assert_eq!( + format + .parse(keyexpr::new("hi/**/@").unwrap()) + .unwrap() + .get("b") + .unwrap(), + None + ); } diff --git a/commons/zenoh-keyexpr/src/key_expr/format/support.rs b/commons/zenoh-keyexpr/src/key_expr/format/support.rs index ed79e88d59..16451797aa 100644 --- a/commons/zenoh-keyexpr/src/key_expr/format/support.rs +++ b/commons/zenoh-keyexpr/src/key_expr/format/support.rs @@ -86,9 +86,33 @@ impl core::fmt::Display for Spec<'_> { } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct Segment<'a> { + /// What precedes a spec in a [`KeFormat`]. + /// It may be: + /// - empty if the spec is the first thing in the format. + /// - `/` if the spec comes right after another spec. + /// - a valid keyexpr followed by `/` if the spec comes after a keyexpr. pub(crate) prefix: &'a str, pub(crate) spec: Spec<'a>, } +impl Segment<'_> { + pub fn prefix(&self) -> Option<&keyexpr> { + match self.prefix { + "" | "/" => None, + _ => Some(unsafe { + keyexpr::from_str_unchecked(trim_suffix_slash(trim_prefix_slash(self.prefix))) + }), + } + } + pub fn id(&self) -> &str { + self.spec.id() + } + pub fn pattern(&self) -> &keyexpr { + self.spec.pattern() + } + pub fn default(&self) -> Option<&keyexpr> { + self.spec.default() + } +} pub enum IterativeConstructor { Complete(Complete), @@ -246,6 +270,9 @@ impl<'s> IKeFormatStorage<'s> for Vec> { } } +/// Trim the prefix slash from a target string if it has one. +/// # Safety +/// `target` is assumed to be a valid `keyexpr` except for the leading slash. pub(crate) fn trim_prefix_slash(target: &str) -> &str { &target[matches!(target.as_bytes().first(), Some(b'/')) as usize..] } diff --git a/commons/zenoh-keyexpr/src/keyexpr_tree/arc_tree.rs b/commons/zenoh-keyexpr/src/keyexpr_tree/arc_tree.rs index c2a7ff5375..a0428ac563 100644 --- a/commons/zenoh-keyexpr/src/keyexpr_tree/arc_tree.rs +++ b/commons/zenoh-keyexpr/src/keyexpr_tree/arc_tree.rs @@ -58,6 +58,7 @@ fn ketree_borrow_mut<'a, T, Token: TokenTrait>( /// The tree and its nodes have shared ownership, while their mutability is managed through the `Token`. /// /// Most of its methods are declared in the [`ITokenKeyExprTree`] trait. +// tags{ketree.arc} pub struct KeArcTree< Weight, Token: TokenTrait = DefaultToken, @@ -143,6 +144,7 @@ where &'a Arc, Wildness, Children, Token>, Token>>, &'a mut Token, ); + // tags{ketree.arc.node} fn node(&'a self, token: &'a Token, at: &keyexpr) -> Option { let inner = ketree_borrow(&self.inner, token); let mut chunks = at.chunks(); @@ -155,10 +157,12 @@ where } Some((node.as_node(), token)) } + // tags{ketree.arc.node.mut} fn node_mut(&'a self, token: &'a mut Token, at: &keyexpr) -> Option { self.node(unsafe { core::mem::transmute(&*token) }, at) .map(|(node, _)| (node, token)) } + // tags{ketree.arc.node.or_create} fn node_or_create(&'a self, token: &'a mut Token, at: &keyexpr) -> Self::NodeMut { let inner = ketree_borrow_mut(&self.inner, token); if at.is_wild() { @@ -206,6 +210,7 @@ where >, &'a Token, >; + // tags{ketree.arc.tree_iter} fn tree_iter(&'a self, token: &'a Token) -> Self::TreeIter { let inner = ketree_borrow(&self.inner, token); TokenPacker { @@ -227,6 +232,7 @@ where >, &'a mut Token, >; + // tags{ketree.arc.tree_iter.mut} fn tree_iter_mut(&'a self, token: &'a mut Token) -> Self::TreeIterMut { let inner = ketree_borrow(&self.inner, token); TokenPacker { @@ -248,6 +254,7 @@ where >, Self::IntersectionItem, >; + // tags{ketree.arc.intersecting} fn intersecting_nodes(&'a self, token: &'a Token, key: &'a keyexpr) -> Self::Intersection { let inner = ketree_borrow(&self.inner, token); if inner.wildness.get() || key.is_wild() { @@ -272,6 +279,7 @@ where >, Self::IntersectionItemMut, >; + // tags{ketree.arc.intersecting.mut} fn intersecting_nodes_mut( &'a self, token: &'a mut Token, @@ -301,6 +309,7 @@ where >, Self::InclusionItem, >; + // tags{ketree.arc.included} fn included_nodes(&'a self, token: &'a Token, key: &'a keyexpr) -> Self::Inclusion { let inner = ketree_borrow(&self.inner, token); if inner.wildness.get() || key.is_wild() { @@ -325,6 +334,7 @@ where >, Self::InclusionItemMut, >; + // tags{ketree.arc.included.mut} fn included_nodes_mut(&'a self, token: &'a mut Token, key: &'a keyexpr) -> Self::InclusionMut { let inner = ketree_borrow(&self.inner, token); if inner.wildness.get() || key.is_wild() { @@ -352,6 +362,7 @@ where >, Self::IncluderItem, >; + // tags{ketree.arc.including} fn nodes_including(&'a self, token: &'a Token, key: &'a keyexpr) -> Self::Includer { let inner = ketree_borrow(&self.inner, token); if inner.wildness.get() || key.is_wild() { @@ -376,6 +387,7 @@ where >, Self::IncluderItemMut, >; + // tags{ketree.arc.including.mut} fn nodes_including_mut(&'a self, token: &'a mut Token, key: &'a keyexpr) -> Self::IncluderMut { let inner = ketree_borrow(&self.inner, token); if inner.wildness.get() || key.is_wild() { @@ -391,6 +403,7 @@ where } type PruneNode = KeArcTreeNode, Wildness, Children, Token>; + // tags{ketree.arc.prune.where} fn prune_where bool>( &self, token: &mut Token, diff --git a/commons/zenoh-keyexpr/src/keyexpr_tree/iters/inclusion.rs b/commons/zenoh-keyexpr/src/keyexpr_tree/iters/inclusion.rs index bd875be1b9..0ed2c96645 100644 --- a/commons/zenoh-keyexpr/src/keyexpr_tree/iters/inclusion.rs +++ b/commons/zenoh-keyexpr/src/keyexpr_tree/iters/inclusion.rs @@ -96,7 +96,7 @@ where }; } let chunk = node.chunk(); - let chunk_is_verbatim = chunk.as_bytes()[0] == b'@'; + let chunk_is_verbatim = chunk.first_byte() == b'@'; for i in *start..*end { let kec_start = self.ke_indices[i]; if kec_start == self.key.len() { @@ -136,7 +136,7 @@ where } None => { let key = unsafe { keyexpr::from_slice_unchecked(key) }; - if unlikely(key == "**") && chunk.as_bytes()[0] != b'@' { + if unlikely(key == "**") && chunk.first_byte() != b'@' { push!(kec_start); node_matches = true; } else if key.includes(chunk) { @@ -259,7 +259,7 @@ where }; } let chunk = node.chunk(); - let chunk_is_verbatim = chunk.as_bytes()[0] == b'@'; + let chunk_is_verbatim = chunk.first_byte() == b'@'; for i in *start..*end { let kec_start = self.ke_indices[i]; if kec_start == self.key.len() { @@ -299,7 +299,7 @@ where } None => { let key = unsafe { keyexpr::from_slice_unchecked(key) }; - if unlikely(key == "**") && chunk.as_bytes()[0] != b'@' { + if unlikely(key == "**") && chunk.first_byte() != b'@' { push!(kec_start); node_matches = true; } else if key.includes(chunk) { diff --git a/commons/zenoh-keyexpr/src/keyexpr_tree/iters/intersection.rs b/commons/zenoh-keyexpr/src/keyexpr_tree/iters/intersection.rs index e46305adbf..34902810f0 100644 --- a/commons/zenoh-keyexpr/src/keyexpr_tree/iters/intersection.rs +++ b/commons/zenoh-keyexpr/src/keyexpr_tree/iters/intersection.rs @@ -95,7 +95,7 @@ where }; } let chunk = node.chunk(); - let chunk_is_verbatim = chunk.as_bytes()[0] == b'@'; + let chunk_is_verbatim = chunk.first_byte() == b'@'; if unlikely(chunk.as_bytes() == b"**") { // If the current node is `**`, it is guaranteed to match... node_matches = true; @@ -295,7 +295,7 @@ where }; } let chunk = node.chunk(); - let chunk_is_verbatim = chunk.as_bytes()[0] == b'@'; + let chunk_is_verbatim = chunk.first_byte() == b'@'; if unlikely(chunk.as_bytes() == b"**") { // If the current node is `**`, it is guaranteed to match... node_matches = true; diff --git a/commons/zenoh-keyexpr/src/lib.rs b/commons/zenoh-keyexpr/src/lib.rs index a31fcb24a5..f80a9c177c 100644 --- a/commons/zenoh-keyexpr/src/lib.rs +++ b/commons/zenoh-keyexpr/src/lib.rs @@ -12,14 +12,41 @@ // ZettaScale Zenoh Team, // -//! ⚠️ WARNING ⚠️ +//! [Key expression](https://github.com/eclipse-zenoh/roadmap/blob/main/rfcs/ALL/Key%20Expressions.md) are Zenoh's address space. //! -//! This crate is intended for Zenoh's internal use. +//! In Zenoh, operations are performed on keys. To allow addressing multiple keys with a single operation, we use Key Expressions (KE). +//! KEs are a small language that express sets of keys through a glob-like language. //! -//! [Click here for Zenoh's documentation](../zenoh/index.html) +//! These semantics can be a bit difficult to implement, so this module provides the following facilities: +//! +//! # Storing Key Expressions +//! This module provides 2 flavours to store strings that have been validated to respect the KE syntax, and a third is provided by [`zenoh`](https://docs.rs/zenoh): +//! - [`keyexpr`] is the equivalent of a [`str`], +//! - [`OwnedKeyExpr`] works like an [`Arc`], +//! - [`KeyExpr`](https://docs.rs/zenoh/latest/zenoh/key_expr/struct.KeyExpr.html) works like a [`Cow`], but also stores some additional context internal to Zenoh to optimize +//! routing and network usage. +//! +//! All of these types [`Deref`](core::ops::Deref) to [`keyexpr`], which notably has methods to check whether a given [`keyexpr::intersects`] with another, +//! or even if a [`keyexpr::includes`] another. +//! +//! # Tying values to Key Expressions +//! When storing values tied to Key Expressions, you might want something more specialized than a [`HashMap`](std::collections::HashMap) if you want to respect +//! the Key Expression semantics with high performance. +//! +//! Enter [KeTrees](keyexpr_tree). These are data-structures specially built to store KE-value pairs in a manner that supports the set-semantics of KEs. +//! +//! # Building and parsing Key Expressions +//! A common issue in REST API is the association of meaning to sections of the URL, and respecting that API in a convenient manner. +//! The same issue arises naturally when designing a KE space, and [`KeFormat`](format::KeFormat) was designed to help you with this, +//! both in constructing and in parsing KEs that fit the formats you've defined. +//! +//! [`kedefine`] also allows you to define formats at compile time, allowing a more performant, but more importantly safer and more convenient use of said formats, +//! as the [`keformat`] and [`kewrite`] macros will be able to tell you if you're attempting to set fields of the format that do not exist. + #![cfg_attr(not(feature = "std"), no_std)] extern crate alloc; pub mod key_expr; + pub use key_expr::*; pub mod keyexpr_tree; diff --git a/commons/zenoh-macros/src/lib.rs b/commons/zenoh-macros/src/lib.rs index 2ee5aebeac..81143c0aec 100644 --- a/commons/zenoh-macros/src/lib.rs +++ b/commons/zenoh-macros/src/lib.rs @@ -19,6 +19,7 @@ //! [Click here for Zenoh's documentation](../zenoh/index.html) use proc_macro::TokenStream; use quote::quote; +use syn::LitStr; use zenoh_keyexpr::format::{ macro_support::{self, SegmentBuilder}, KeFormat, @@ -314,3 +315,14 @@ pub fn keformat(tokens: TokenStream) -> TokenStream { }) .into() } + +/// Equivalent to [`keyexpr::new`](zenoh_keyexpr::keyexpr::new), but the check is run at compile-time and will throw a compile error in case of failure. +#[proc_macro] +pub fn ke(tokens: TokenStream) -> TokenStream { + let value: LitStr = syn::parse(tokens).unwrap(); + let ke = value.value(); + match zenoh_keyexpr::keyexpr::new(&ke) { + Ok(_) => quote!(unsafe {::zenoh::key_expr::keyexpr::from_str_unchecked(#ke)}).into(), + Err(e) => panic!("{}", e), + } +} diff --git a/zenoh/src/key_expr.rs b/zenoh/src/key_expr.rs index d2295f9798..2eee6c0665 100644 --- a/zenoh/src/key_expr.rs +++ b/zenoh/src/key_expr.rs @@ -12,7 +12,36 @@ // ZettaScale Zenoh Team, // -//! [Key expression](https://github.com/eclipse-zenoh/roadmap/blob/main/rfcs/ALL/Key%20Expressions.md) types and utils. +//! [Key expression](https://github.com/eclipse-zenoh/roadmap/blob/main/rfcs/ALL/Key%20Expressions.md) are Zenoh's address space. +//! +//! In Zenoh, operations are performed on keys. To allow addressing multiple keys with a single operation, we use Key Expressions (KE). +//! KEs are a small language that express sets of keys through a glob-like language. +//! +//! These semantics can be a bit difficult to implement, so this module provides the following facilities: +//! +//! # Storing Key Expressions +//! This module provides 3 flavours to store strings that have been validated to respect the KE syntax: +//! - [`keyexpr`] is the equivalent of a [`str`], +//! - [`OwnedKeyExpr`] works like an [`Arc`], +//! - [`KeyExpr`] works like a [`Cow`], but also stores some additional context internal to Zenoh to optimize +//! routing and network usage. +//! +//! All of these types [`Deref`](core::ops::Deref) to [`keyexpr`], which notably has methods to check whether a given [`keyexpr::intersects`] with another, +//! or even if a [`keyexpr::includes`] another. +//! +//! # Tying values to Key Expressions +//! When storing values tied to Key Expressions, you might want something more specialized than a [`HashMap`](std::collections::HashMap) if you want to respect +//! the Key Expression semantics with high performance. +//! +//! Enter [KeTrees](keyexpr_tree). These are data-structures specially built to store KE-value pairs in a manner that supports the set-semantics of KEs. +//! +//! # Building and parsing Key Expressions +//! A common issue in REST API is the association of meaning to sections of the URL, and respecting that API in a convenient manner. +//! The same issue arises naturally when designing a KE space, and [`KeFormat`](format::KeFormat) was designed to help you with this, +//! both in constructing and in parsing KEs that fit the formats you've defined. +//! +//! [`kedefine`] also allows you to define formats at compile time, allowing a more performant, but more importantly safer and more convenient use of said formats, +//! as the [`keformat`] and [`kewrite`] macros will be able to tell you if you're attempting to set fields of the format that do not exist. use std::{ convert::{TryFrom, TryInto}, @@ -20,7 +49,8 @@ use std::{ str::FromStr, }; use zenoh_core::{AsyncResolve, Resolvable, SyncResolve}; -pub use zenoh_protocol::core::key_expr::*; +pub use zenoh_keyexpr::*; +pub use zenoh_macros::{kedefine, keformat, kewrite}; use zenoh_protocol::{ core::{key_expr::canon::Canonizable, ExprId, WireExpr}, network::{declare, DeclareBody, Mapping, UndeclareKeyExpr}, diff --git a/zenoh/src/lib.rs b/zenoh/src/lib.rs index 5c3b938e5b..0a8f1feb64 100644 --- a/zenoh/src/lib.rs +++ b/zenoh/src/lib.rs @@ -87,7 +87,7 @@ use prelude::*; use scouting::ScoutBuilder; use std::future::Ready; use zenoh_core::{AsyncResolve, Resolvable, SyncResolve}; -pub use zenoh_macros::{kedefine, keformat, kewrite}; +pub use zenoh_macros::{ke, kedefine, keformat, kewrite}; use zenoh_protocol::core::WhatAmIMatcher; use zenoh_result::{zerror, ZResult}; use zenoh_util::concat_enabled_features;