From d6d8275802d7d99060f225d8890bb2ba777c33e0 Mon Sep 17 00:00:00 2001 From: Jeremy Leibs Date: Thu, 17 Oct 2024 01:46:32 -0400 Subject: [PATCH] Allow splitting entity path expressions with whitespace (#7782) ### What - Resolves: https://github.com/rerun-io/rerun/issues/7740 This is a little tricky since we don't want to split on whitespace that happens to show up between a + or - and the expression. Logic seems correct from testing. ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested the web demo (if applicable): * Using examples from latest `main` build: [rerun.io/viewer](https://rerun.io/viewer/pr/7782?manifest_url=https://app.rerun.io/version/main/examples_manifest.json) * Using full set of examples from `nightly` build: [rerun.io/viewer](https://rerun.io/viewer/pr/7782?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json) * [x] The PR title and labels are set such as to maximize their usefulness for the next release's CHANGELOG * [x] If applicable, add a new check to the [release checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)! * [x] If have noted any breaking changes to the log API in `CHANGELOG.md` and the migration guide - [PR Build Summary](https://build.rerun.io/pr/7782) - [Recent benchmark results](https://build.rerun.io/graphs/crates.html) - [Wasm size tracking](https://build.rerun.io/graphs/sizes.html) To run all checks from `main`, comment on the PR with `@rerun-bot full-check`. --------- Co-authored-by: Emil Ernerfeldt --- .github/workflows/labels.yml | 2 +- .../src/path/entity_path_filter.rs | 104 +++++++++++++++++- rerun_py/tests/unit/test_dataframe.py | 19 ++++ 3 files changed, 122 insertions(+), 3 deletions(-) diff --git a/.github/workflows/labels.yml b/.github/workflows/labels.yml index b6f86391c1e2..a0e7ac40f10c 100644 --- a/.github/workflows/labels.yml +++ b/.github/workflows/labels.yml @@ -39,4 +39,4 @@ jobs: with: mode: minimum count: 1 - labels: "πŸ“Š analytics, 🟦 blueprint, πŸͺ³ bug, 🌊 C++ API, CLI, codegen/idl, πŸ§‘β€πŸ’» dev experience, dependencies, πŸ“– documentation, πŸ’¬ discussion, examples, exclude from changelog, πŸͺ΅ Log-API, πŸ“‰ performance, 🐍 Python API, ⛃ re_datastore, πŸ” re_query, πŸ“Ί re_viewer, πŸ”Ί re_renderer, 🚜 refactor, β›΄ release, πŸ¦€ Rust API, πŸ”¨ testing, ui, πŸ•ΈοΈ web" + labels: "πŸ“Š analytics, 🟦 blueprint, πŸͺ³ bug, 🌊 C++ API, CLI, codegen/idl, πŸ§‘β€πŸ’» dev experience, dependencies, πŸ“– documentation, πŸ’¬ discussion, examples, exclude from changelog, πŸͺ΅ Log & send APIs, πŸ“‰ performance, 🐍 Python API, ⛃ re_datastore, πŸ” re_query, πŸ“Ί re_viewer, πŸ”Ί re_renderer, 🚜 refactor, β›΄ release, πŸ¦€ Rust API, πŸ”¨ testing, ui, πŸ•ΈοΈ web" diff --git a/crates/store/re_log_types/src/path/entity_path_filter.rs b/crates/store/re_log_types/src/path/entity_path_filter.rs index 220e70c64165..a59629c1e089 100644 --- a/crates/store/re_log_types/src/path/entity_path_filter.rs +++ b/crates/store/re_log_types/src/path/entity_path_filter.rs @@ -169,6 +169,75 @@ impl TryFrom<&str> for EntityPathFilter { } } +/// Split a string into whitespace-separated tokens with extra logic. +/// +/// Specifically, we allow for whitespace between `+`/`-` and the following token. +/// +/// Additional rules: +/// - Escaped whitespace never results in a split. +/// - Otherwise always split on `\n` (even if it follows a `+` or `-` character). +/// - Only consider `+` and `-` characters as special if they are the first character of a token. +/// - Split on whitespace does not following a relevant `+` or `-` character. +fn split_whitespace_smart(path: &'_ str) -> Vec<&'_ str> { + #![allow(clippy::unwrap_used)] + + // We parse on bytes, and take care to only split on either side of a one-byte ASCII, + // making the `from_utf8(…)`s below safe to unwrap. + let mut bytes = path.as_bytes(); + + let mut tokens = vec![]; + + // Start by ignoring any leading whitespace + while !bytes.is_empty() { + let mut i = 0; + let mut is_in_escape = false; + let mut is_include_exclude = false; + let mut new_token = true; + + // Find the next unescaped whitespace character not following a '+' or '-' character + while i < bytes.len() { + let is_unescaped_whitespace = !is_in_escape && bytes[i].is_ascii_whitespace(); + + let is_unescaped_newline = !is_in_escape && bytes[i] == b'\n'; + + if is_unescaped_newline || (!is_include_exclude && is_unescaped_whitespace) { + break; + } + + is_in_escape = bytes[i] == b'\\'; + + if bytes[i] == b'+' || bytes[i] == b'-' { + is_include_exclude = new_token; + } else if !is_unescaped_whitespace { + is_include_exclude = false; + new_token = false; + } + + i += 1; + } + if i > 0 { + tokens.push(&bytes[..i]); + } + + // Continue skipping whitespace characters + while i < bytes.len() { + if is_in_escape || !bytes[i].is_ascii_whitespace() { + break; + } + is_in_escape = bytes[i] == b'\\'; + i += 1; + } + + bytes = &bytes[i..]; + } + + // Safety: we split at proper character boundaries + tokens + .iter() + .map(|token| std::str::from_utf8(token).unwrap()) + .collect() +} + impl EntityPathFilter { /// Parse an entity path filter from a string while ignore syntax errors. /// @@ -187,7 +256,8 @@ impl EntityPathFilter { /// /// Conflicting rules are resolved by the last rule. pub fn parse_forgiving(rules: &str, subst_env: &EntityPathSubs) -> Self { - Self::from_query_expressions_forgiving(rules.split('\n'), subst_env) + let split_rules = split_whitespace_smart(rules); + Self::from_query_expressions_forgiving(split_rules, subst_env) } /// Build a filter from a list of query expressions. @@ -243,7 +313,8 @@ impl EntityPathFilter { rules: &str, subst_env: &EntityPathSubs, ) -> Result { - Self::from_query_expressions_strict(rules.split('\n'), subst_env) + let split_rules = split_whitespace_smart(rules); + Self::from_query_expressions_strict(split_rules, subst_env) } /// Build a filter from a list of query expressions. @@ -891,3 +962,32 @@ mod tests { } } } + +#[test] +fn test_split_whitespace_smart() { + assert_eq!(split_whitespace_smart("/world"), vec!["/world"]); + assert_eq!(split_whitespace_smart("a b c"), vec!["a", "b", "c"]); + assert_eq!(split_whitespace_smart("a\nb\tc "), vec!["a", "b", "c"]); + assert_eq!(split_whitespace_smart(r"a\ b c"), vec![r"a\ b", "c"]); + + assert_eq!( + split_whitespace_smart(" + a - b + c"), + vec!["+ a", "- b", "+ c"] + ); + assert_eq!( + split_whitespace_smart("+ a -\n b + c"), + vec!["+ a", "-", "b", "+ c"] + ); + assert_eq!( + split_whitespace_smart("/weird/path- +/oth- erpath"), + vec!["/weird/path-", "+/oth-", "erpath"] + ); + assert_eq!( + split_whitespace_smart(r"+world/** -/world/points"), + vec!["+world/**", "-/world/points"] + ); + assert_eq!( + split_whitespace_smart(r"+ world/** - /world/points"), + vec!["+ world/**", "- /world/points"] + ); +} diff --git a/rerun_py/tests/unit/test_dataframe.py b/rerun_py/tests/unit/test_dataframe.py index be1134c96199..c5ccf970ea1d 100644 --- a/rerun_py/tests/unit/test_dataframe.py +++ b/rerun_py/tests/unit/test_dataframe.py @@ -173,6 +173,25 @@ def test_full_view(self) -> None: assert table.num_columns == 1 assert table.num_rows == 1 + def test_content_filters(self) -> None: + filter_expressions = [ + "+/** -/static_text", + """ + +/** + -/static_text + """, + {"/** -/static_text": ["Position3D", "Color"]}, + ] + + for expr in filter_expressions: + view = self.recording.view(index="my_index", contents=expr) + + table = view.select().read_all() + + # my_index, log_time, log_tick, points, colors + assert table.num_columns == 5 + assert table.num_rows == 2 + def test_select_columns(self) -> None: view = self.recording.view(index="my_index", contents="points") index_col = rr.dataframe.IndexColumnSelector("my_index")