From a22668ef7beb160be8986ce11ff5202005b5ab73 Mon Sep 17 00:00:00 2001 From: Mateusz Gienieczko Date: Mon, 8 Jan 2024 14:37:39 +0100 Subject: [PATCH] feat: whitespace modes in the parser - `rsonpath-syntax` disallows leading and trailing whitespace by default, but can relax this with parser options; - main parser used in `rq` now ignores leading and trailing query whitespace Ref: #166 --- Cargo.lock | 28 ++--- crates/rsonpath-syntax/src/error.rs | 9 ++ crates/rsonpath-syntax/src/lib.rs | 118 +++++++++++++++++- crates/rsonpath-syntax/src/parser.rs | 26 +++- crates/rsonpath/src/main.rs | 8 +- .../tests/cmd/query_with_leading_ws.toml | 6 + .../tests/cmd/query_with_trailing_ws.toml | 6 + 7 files changed, 177 insertions(+), 24 deletions(-) create mode 100644 crates/rsonpath/tests/cmd/query_with_leading_ws.toml create mode 100644 crates/rsonpath/tests/cmd/query_with_trailing_ws.toml diff --git a/Cargo.lock b/Cargo.lock index 7d8548e0..f047a325 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -175,7 +175,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -296,7 +296,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -700,9 +700,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.74" +version = "1.0.76" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2de98502f212cfcea8d0bb305bd0f49d7ebdd75b64ba0a68f937d888f4e0d6db" +checksum = "95fc56cda0b5c3325f5fbbd7ff9fda9e02bb00bb3dac51252d2f1bfa1cb8cc8c" dependencies = [ "unicode-ident", ] @@ -1017,22 +1017,22 @@ checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090" [[package]] name = "serde" -version = "1.0.194" +version = "1.0.195" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b114498256798c94a0689e1a15fec6005dee8ac1f41de56404b67afc2a4b773" +checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.194" +version = "1.0.195" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3385e45322e8f9931410f01b3031ec534c3947d0e94c18049af4d9f9907d4e0" +checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1130,9 +1130,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.46" +version = "2.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89456b690ff72fddcecf231caedbe615c59480c93358a93dfae7fc29e3ebbf0e" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" dependencies = [ "proc-macro2", "quote", @@ -1181,7 +1181,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1193,7 +1193,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", "test-case-core", ] @@ -1214,7 +1214,7 @@ checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] diff --git a/crates/rsonpath-syntax/src/error.rs b/crates/rsonpath-syntax/src/error.rs index f597384a..e80f3cb7 100644 --- a/crates/rsonpath-syntax/src/error.rs +++ b/crates/rsonpath-syntax/src/error.rs @@ -106,6 +106,8 @@ pub(crate) struct SyntaxError { #[derive(Debug, PartialEq, Eq, Clone)] pub(crate) enum SyntaxErrorKind { + DisallowedLeadingWhitespace, + DisallowedTrailingWhitespace, InvalidUnescapedCharacter, InvalidEscapeSequence, UnpairedHighSurrogate, @@ -179,6 +181,9 @@ impl SyntaxError { let (prefix, error, suffix) = self.split_error(input); // Kind-specific notes and suggestion building. match self.kind { + SyntaxErrorKind::DisallowedLeadingWhitespace | SyntaxErrorKind::DisallowedTrailingWhitespace => { + suggestion.remove(start_idx, error.len()); + } SyntaxErrorKind::InvalidUnescapedCharacter => { if error == "\"" { suggestion.replace(start_idx, 1, r#"\""#); @@ -637,6 +642,8 @@ impl SyntaxErrorKind { #[inline] fn toplevel_message(&self) -> String { match self { + Self::DisallowedLeadingWhitespace => "query starting with whitespace".to_string(), + Self::DisallowedTrailingWhitespace => "query ending with whitespace".to_string(), Self::InvalidUnescapedCharacter => "invalid unescaped control character".to_string(), Self::InvalidEscapeSequence => "invalid escape sequence".to_string(), Self::UnpairedHighSurrogate => "invalid unicode escape sequence - unpaired high surrogate".to_string(), @@ -661,6 +668,8 @@ impl SyntaxErrorKind { #[inline] fn underline_message(&self) -> String { match self { + Self::DisallowedLeadingWhitespace => "leading whitespace is disallowed".to_string(), + Self::DisallowedTrailingWhitespace => "trailing whitespace is disallowed".to_string(), Self::InvalidUnescapedCharacter => "this character must be escaped".to_string(), Self::InvalidEscapeSequence => "not a valid escape sequence".to_string(), Self::UnpairedHighSurrogate => "this high surrogate is unpaired".to_string(), diff --git a/crates/rsonpath-syntax/src/lib.rs b/crates/rsonpath-syntax/src/lib.rs index ea27bb3e..fc90babe 100644 --- a/crates/rsonpath-syntax/src/lib.rs +++ b/crates/rsonpath-syntax/src/lib.rs @@ -141,16 +141,89 @@ use std::{ /// JSONPath query parser. #[derive(Debug, Clone, Default)] -pub struct Parser {} +pub struct Parser { + options: ParserOptions, +} /// Configurable builder for a [`Parser`] instance. #[derive(Debug, Clone, Default)] -pub struct ParserBuilder {} +pub struct ParserBuilder { + options: ParserOptions, +} + +#[derive(Debug, Clone)] +struct ParserOptions { + relaxed_whitespace: bool, +} + +impl ParserBuilder { + /// Create a new instance of the builder with the default settings. + #[inline] + #[must_use] + pub fn new() -> Self { + Self { + options: ParserOptions::default(), + } + } + + /// Control whether leading and trailing whitespace is allowed in a query. + /// Defaults to false. + /// + /// The [RFC](https://www.ietf.org/archive/id/draft-ietf-jsonpath-base-21.html) grammar + /// makes leading and trailing whitespace disallowed. The [`Parser`] defaults to this strict handling, + /// but can be relaxed with this setting. + /// + /// ## Examples + /// ``` + /// # use rsonpath_syntax::{JsonPathQuery, Parser, ParserBuilder}; + /// let default_parser = ParserBuilder::new().build(); + /// let relaxed_parser = ParserBuilder::new() + /// .allow_surrounding_whitespace(true) + /// .build(); + /// + /// let query = " $.leading_whitespace"; + /// assert!(default_parser.parse(query).is_err()); + /// assert!(relaxed_parser.parse(query).is_ok()); + /// ``` + #[inline] + pub fn allow_surrounding_whitespace(&mut self, value: bool) -> &mut Self { + self.options.relaxed_whitespace = value; + self + } + + /// Build a new instance of a [`Parser`]. + #[inline] + #[must_use] + pub fn build(&self) -> Parser { + Parser { + options: self.options.clone(), + } + } +} + +impl ParserOptions { + fn is_leading_whitespace_allowed(&self) -> bool { + self.relaxed_whitespace + } + + fn is_trailing_whitespace_allowed(&self) -> bool { + self.relaxed_whitespace + } +} + +impl Default for ParserOptions { + #[inline(always)] + fn default() -> Self { + Self { + relaxed_whitespace: false, + } + } +} impl From for Parser { #[inline(always)] - fn from(_value: ParserBuilder) -> Self { - Self {} + fn from(value: ParserBuilder) -> Self { + Self { options: value.options } } } @@ -178,8 +251,8 @@ impl Parser { /// /// Note that leading and trailing whitespace is explicitly disallowed by the spec. #[inline] - pub fn parse(&mut self, str: &str) -> Result { - crate::parser::parse_json_path_query(str) + pub fn parse(&self, str: &str) -> Result { + crate::parser::parse_json_path_query(str, &self.options) } } @@ -400,6 +473,39 @@ impl Display for Index { #[cfg(test)] mod tests { use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn leading_whitespace_is_disallowed() { + let err = parse(" $").expect_err("should fail"); + let display = format!("{err}"); + let expected = r"error: query starting with whitespace + + $ + ^^ leading whitespace is disallowed + (bytes 0-1) + + +suggestion: did you mean `$` ? +"; + assert_eq!(display, expected); + } + + #[test] + fn trailing_whitespace_is_disallowed() { + let err = parse("$ ").expect_err("should fail"); + let display = format!("{err}"); + let expected = r"error: query ending with whitespace + + $ + ^^ trailing whitespace is disallowed + (bytes 1-2) + + +suggestion: did you mean `$` ? +"; + assert_eq!(display, expected); + } mod name_selector { use super::*; diff --git a/crates/rsonpath-syntax/src/parser.rs b/crates/rsonpath-syntax/src/parser.rs index a545f349..76c54410 100644 --- a/crates/rsonpath-syntax/src/parser.rs +++ b/crates/rsonpath-syntax/src/parser.rs @@ -2,7 +2,7 @@ use crate::{ error::{InternalParseError, ParseErrorBuilder, SyntaxError, SyntaxErrorKind}, num::{JsonInt, JsonUInt}, str::{JsonString, JsonStringBuilder}, - Index, JsonPathQuery, Result, Segment, Selector, Selectors, + Index, JsonPathQuery, ParserOptions, Result, Segment, Selector, Selectors, }; use nom::{branch::*, bytes::complete::*, character::complete::*, combinator::*, multi::*, sequence::*, *}; use std::{iter::Peekable, str::FromStr}; @@ -30,11 +30,19 @@ where } } -pub(crate) fn parse_json_path_query(q: &str) -> Result { +pub(crate) fn parse_json_path_query(q: &str, options: &ParserOptions) -> Result { let original_input = q; let mut parse_error = ParseErrorBuilder::new(); let mut segments = vec![]; let q = skip_whitespace(q); + let leading_whitespace_len = original_input.len() - q.len(); + if leading_whitespace_len > 0 && !options.is_leading_whitespace_allowed() { + parse_error.add(SyntaxError::new( + SyntaxErrorKind::DisallowedLeadingWhitespace, + original_input.len(), + leading_whitespace_len, + )); + } let q = match char::<_, nom::error::Error<_>>('$')(q).finish() { Ok((q, _)) => skip_whitespace(q), Err(e) => { @@ -69,6 +77,20 @@ pub(crate) fn parse_json_path_query(q: &str) -> Result { q = skip_whitespace(q); } + // For strict RFC compliance trailing whitespace has to be disallowed. + // This is hard to organically obtain from the parsing above, so we insert this awkward direct check if needed. + if !options.is_trailing_whitespace_allowed() { + let trimmed = original_input.trim_end_matches(WHITESPACE); + let trailing_whitespace_len = original_input.len() - trimmed.len(); + if trailing_whitespace_len > 0 { + parse_error.add(SyntaxError::new( + SyntaxErrorKind::DisallowedTrailingWhitespace, + trailing_whitespace_len, + trailing_whitespace_len, + )); + } + } + if parse_error.is_empty() { Ok(JsonPathQuery { segments }) } else { diff --git a/crates/rsonpath/src/main.rs b/crates/rsonpath/src/main.rs index ac9b8c00..4ab23d1d 100644 --- a/crates/rsonpath/src/main.rs +++ b/crates/rsonpath/src/main.rs @@ -4,7 +4,7 @@ use color_eyre::{eyre::Result, Help}; use error::{report_compiler_error, report_parser_error}; use log::*; use rsonpath_lib::automaton::Automaton; -use rsonpath_syntax::JsonPathQuery; +use rsonpath_syntax::{JsonPathQuery, ParserBuilder}; use runner::Runner; mod args; @@ -57,7 +57,11 @@ fn run_with_args(args: &Args) -> Result<()> { } fn parse_query(query_string: &str) -> Result { - rsonpath_syntax::parse(query_string) + let mut parser_builder = ParserBuilder::default(); + parser_builder.allow_surrounding_whitespace(true); + let parser: rsonpath_syntax::Parser = parser_builder.into(); + parser + .parse(query_string) .map_err(|err| report_parser_error(err).wrap_err("Could not parse JSONPath query.")) } diff --git a/crates/rsonpath/tests/cmd/query_with_leading_ws.toml b/crates/rsonpath/tests/cmd/query_with_leading_ws.toml new file mode 100644 index 00000000..06eb4e2d --- /dev/null +++ b/crates/rsonpath/tests/cmd/query_with_leading_ws.toml @@ -0,0 +1,6 @@ +bin = { name = "rq" } + +args = [" $", "--json", "{}"] +stdout = """ +{} +""" diff --git a/crates/rsonpath/tests/cmd/query_with_trailing_ws.toml b/crates/rsonpath/tests/cmd/query_with_trailing_ws.toml new file mode 100644 index 00000000..e46287c4 --- /dev/null +++ b/crates/rsonpath/tests/cmd/query_with_trailing_ws.toml @@ -0,0 +1,6 @@ +bin = { name = "rq" } + +args = ["$ ", "--json", "{}"] +stdout = """ +{} +"""