Skip to content

Commit

Permalink
Allow splitting entity path expressions with whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
jleibs committed Oct 16, 2024
1 parent f4efb80 commit edbeb8f
Showing 1 changed file with 91 additions and 2 deletions.
93 changes: 91 additions & 2 deletions crates/store/re_log_types/src/path/entity_path_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,67 @@ impl TryFrom<&str> for EntityPathFilter {
}
}

/// Split a string into whitespace-separated tokens with extra logic
///
/// Additional rules:
/// - Escaped whitespace doesn't result in a split
/// - Otherwise always split on `\n`.
/// - And split on whitespace that is not following a `+` or `-` character.
fn split_whitespace_smart(path: &'_ str) -> Vec<&'_ str> {
#![allow(clippy::unwrap_used)]

// We parse on bytes, and take care to only split on either side of a one-byte ASCII,
// making the `from_utf8(…)`s below safe to unwrap.
let mut bytes = path.as_bytes();

let mut tokens = vec![];

while !bytes.is_empty() {
let mut i = 0;
let mut is_in_escape = false;
let mut is_include_exclude = false;

// Find the next unescaped whitespace character not following a '+' or '-' character
while i < bytes.len() {
let is_unescaped_whitespace = !is_in_escape && bytes[i].is_ascii_whitespace();

let is_unescaped_newline = !is_in_escape && bytes[i] == b'\n';

if is_unescaped_newline || (!is_include_exclude && is_unescaped_whitespace) {
break;
}

is_in_escape = bytes[i] == b'\\';

if bytes[i] == b'+' || bytes[i] == b'-' {
is_include_exclude = true;
} else if !is_unescaped_whitespace {
is_include_exclude = false;
}

i += 1;
}
tokens.push(&bytes[..i]);

// Continue skipping whitespace characters
while i < bytes.len() {
if is_in_escape || !bytes[i].is_ascii_whitespace() {
break;
}
is_in_escape = bytes[i] == b'\\';
i += 1;
}

bytes = &bytes[i..];
}

// Safety: we split at proper character boundaries
tokens
.iter()
.map(|token| std::str::from_utf8(token).unwrap())
.collect()
}

impl EntityPathFilter {
/// Parse an entity path filter from a string while ignore syntax errors.
///
Expand All @@ -187,7 +248,8 @@ impl EntityPathFilter {
///
/// Conflicting rules are resolved by the last rule.
pub fn parse_forgiving(rules: &str, subst_env: &EntityPathSubs) -> Self {
Self::from_query_expressions_forgiving(rules.split('\n'), subst_env)
let split_rules = split_whitespace_smart(rules);
Self::from_query_expressions_forgiving(split_rules, subst_env)
}

/// Build a filter from a list of query expressions.
Expand Down Expand Up @@ -243,7 +305,9 @@ impl EntityPathFilter {
rules: &str,
subst_env: &EntityPathSubs,
) -> Result<Self, EntityPathFilterParseError> {
Self::from_query_expressions_strict(rules.split('\n'), subst_env)
let split_rules = split_whitespace_smart(rules);

Self::from_query_expressions_strict(split_rules, subst_env)
}

/// Build a filter from a list of query expressions.
Expand Down Expand Up @@ -891,3 +955,28 @@ mod tests {
}
}
}

#[test]
fn test_split_whitespace_smart() {
assert_eq!(split_whitespace_smart("/world"), vec!["/world"]);
assert_eq!(split_whitespace_smart("a b c"), vec!["a", "b", "c"]);
assert_eq!(split_whitespace_smart("a\nb\tc "), vec!["a", "b", "c"]);
assert_eq!(split_whitespace_smart(r"a\ b c"), vec![r"a\ b", "c"]);

assert_eq!(
split_whitespace_smart("+ a - b + c"),
vec!["+ a", "- b", "+ c"]
);
assert_eq!(
split_whitespace_smart("+ a -\n b + c"),
vec!["+ a", "-", "b", "+ c"]
);
assert_eq!(
split_whitespace_smart(r"+world/** -/world/points"),
vec!["+world/**", "-/world/points"]
);
assert_eq!(
split_whitespace_smart(r"+ world/** - /world/points"),
vec!["+ world/**", "- /world/points"]
);
}

0 comments on commit edbeb8f

Please sign in to comment.