Skip to content

Commit

Permalink
feat: whitespace modes in the parser
Browse files Browse the repository at this point in the history
- `rsonpath-syntax` disallows leading and trailing
whitespace by default, but can relax this with parser options;
- main parser used in `rq` now ignores leading and trailing query whitespace

Ref: #166
  • Loading branch information
V0ldek committed Jan 9, 2024
1 parent 487d132 commit a22668e
Show file tree
Hide file tree
Showing 7 changed files with 177 additions and 24 deletions.
28 changes: 14 additions & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions crates/rsonpath-syntax/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ pub(crate) struct SyntaxError {

#[derive(Debug, PartialEq, Eq, Clone)]
pub(crate) enum SyntaxErrorKind {
DisallowedLeadingWhitespace,
DisallowedTrailingWhitespace,
InvalidUnescapedCharacter,
InvalidEscapeSequence,
UnpairedHighSurrogate,
Expand Down Expand Up @@ -179,6 +181,9 @@ impl SyntaxError {
let (prefix, error, suffix) = self.split_error(input);
// Kind-specific notes and suggestion building.
match self.kind {
SyntaxErrorKind::DisallowedLeadingWhitespace | SyntaxErrorKind::DisallowedTrailingWhitespace => {
suggestion.remove(start_idx, error.len());
}
SyntaxErrorKind::InvalidUnescapedCharacter => {
if error == "\"" {
suggestion.replace(start_idx, 1, r#"\""#);
Expand Down Expand Up @@ -637,6 +642,8 @@ impl SyntaxErrorKind {
#[inline]
fn toplevel_message(&self) -> String {
match self {
Self::DisallowedLeadingWhitespace => "query starting with whitespace".to_string(),
Self::DisallowedTrailingWhitespace => "query ending with whitespace".to_string(),
Self::InvalidUnescapedCharacter => "invalid unescaped control character".to_string(),
Self::InvalidEscapeSequence => "invalid escape sequence".to_string(),
Self::UnpairedHighSurrogate => "invalid unicode escape sequence - unpaired high surrogate".to_string(),
Expand All @@ -661,6 +668,8 @@ impl SyntaxErrorKind {
#[inline]
fn underline_message(&self) -> String {
match self {
Self::DisallowedLeadingWhitespace => "leading whitespace is disallowed".to_string(),
Self::DisallowedTrailingWhitespace => "trailing whitespace is disallowed".to_string(),
Self::InvalidUnescapedCharacter => "this character must be escaped".to_string(),
Self::InvalidEscapeSequence => "not a valid escape sequence".to_string(),
Self::UnpairedHighSurrogate => "this high surrogate is unpaired".to_string(),
Expand Down
118 changes: 112 additions & 6 deletions crates/rsonpath-syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,16 +141,89 @@ use std::{

/// JSONPath query parser.
#[derive(Debug, Clone, Default)]
pub struct Parser {}
pub struct Parser {
options: ParserOptions,
}

/// Configurable builder for a [`Parser`] instance.
#[derive(Debug, Clone, Default)]
pub struct ParserBuilder {}
pub struct ParserBuilder {
options: ParserOptions,
}

#[derive(Debug, Clone)]
struct ParserOptions {
relaxed_whitespace: bool,
}

impl ParserBuilder {
/// Create a new instance of the builder with the default settings.
#[inline]
#[must_use]
pub fn new() -> Self {
Self {
options: ParserOptions::default(),
}
}

/// Control whether leading and trailing whitespace is allowed in a query.
/// Defaults to false.
///
/// The [RFC](https://www.ietf.org/archive/id/draft-ietf-jsonpath-base-21.html) grammar
/// makes leading and trailing whitespace disallowed. The [`Parser`] defaults to this strict handling,
/// but can be relaxed with this setting.
///
/// ## Examples
/// ```
/// # use rsonpath_syntax::{JsonPathQuery, Parser, ParserBuilder};
/// let default_parser = ParserBuilder::new().build();
/// let relaxed_parser = ParserBuilder::new()
/// .allow_surrounding_whitespace(true)
/// .build();
///
/// let query = " $.leading_whitespace";
/// assert!(default_parser.parse(query).is_err());
/// assert!(relaxed_parser.parse(query).is_ok());
/// ```
#[inline]
pub fn allow_surrounding_whitespace(&mut self, value: bool) -> &mut Self {
self.options.relaxed_whitespace = value;
self
}

/// Build a new instance of a [`Parser`].
#[inline]
#[must_use]
pub fn build(&self) -> Parser {
Parser {
options: self.options.clone(),
}
}
}

impl ParserOptions {
fn is_leading_whitespace_allowed(&self) -> bool {
self.relaxed_whitespace
}

fn is_trailing_whitespace_allowed(&self) -> bool {
self.relaxed_whitespace
}
}

impl Default for ParserOptions {
#[inline(always)]
fn default() -> Self {
Self {
relaxed_whitespace: false,
}
}
}

impl From<ParserBuilder> for Parser {
#[inline(always)]
fn from(_value: ParserBuilder) -> Self {
Self {}
fn from(value: ParserBuilder) -> Self {
Self { options: value.options }
}
}

Expand Down Expand Up @@ -178,8 +251,8 @@ impl Parser {
///
/// Note that leading and trailing whitespace is explicitly disallowed by the spec.
#[inline]
pub fn parse(&mut self, str: &str) -> Result<JsonPathQuery> {
crate::parser::parse_json_path_query(str)
pub fn parse(&self, str: &str) -> Result<JsonPathQuery> {
crate::parser::parse_json_path_query(str, &self.options)
}
}

Expand Down Expand Up @@ -400,6 +473,39 @@ impl Display for Index {
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;

#[test]
fn leading_whitespace_is_disallowed() {
let err = parse(" $").expect_err("should fail");
let display = format!("{err}");
let expected = r"error: query starting with whitespace
$
^^ leading whitespace is disallowed
(bytes 0-1)
suggestion: did you mean `$` ?
";
assert_eq!(display, expected);
}

#[test]
fn trailing_whitespace_is_disallowed() {
let err = parse("$ ").expect_err("should fail");
let display = format!("{err}");
let expected = r"error: query ending with whitespace
$
^^ trailing whitespace is disallowed
(bytes 1-2)
suggestion: did you mean `$` ?
";
assert_eq!(display, expected);
}

mod name_selector {
use super::*;
Expand Down
26 changes: 24 additions & 2 deletions crates/rsonpath-syntax/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::{
error::{InternalParseError, ParseErrorBuilder, SyntaxError, SyntaxErrorKind},
num::{JsonInt, JsonUInt},
str::{JsonString, JsonStringBuilder},
Index, JsonPathQuery, Result, Segment, Selector, Selectors,
Index, JsonPathQuery, ParserOptions, Result, Segment, Selector, Selectors,
};
use nom::{branch::*, bytes::complete::*, character::complete::*, combinator::*, multi::*, sequence::*, *};
use std::{iter::Peekable, str::FromStr};
Expand Down Expand Up @@ -30,11 +30,19 @@ where
}
}

pub(crate) fn parse_json_path_query(q: &str) -> Result<JsonPathQuery> {
pub(crate) fn parse_json_path_query(q: &str, options: &ParserOptions) -> Result<JsonPathQuery> {
let original_input = q;
let mut parse_error = ParseErrorBuilder::new();
let mut segments = vec![];
let q = skip_whitespace(q);
let leading_whitespace_len = original_input.len() - q.len();
if leading_whitespace_len > 0 && !options.is_leading_whitespace_allowed() {
parse_error.add(SyntaxError::new(
SyntaxErrorKind::DisallowedLeadingWhitespace,
original_input.len(),
leading_whitespace_len,
));
}
let q = match char::<_, nom::error::Error<_>>('$')(q).finish() {
Ok((q, _)) => skip_whitespace(q),
Err(e) => {
Expand Down Expand Up @@ -69,6 +77,20 @@ pub(crate) fn parse_json_path_query(q: &str) -> Result<JsonPathQuery> {
q = skip_whitespace(q);
}

// For strict RFC compliance trailing whitespace has to be disallowed.
// This is hard to organically obtain from the parsing above, so we insert this awkward direct check if needed.
if !options.is_trailing_whitespace_allowed() {
let trimmed = original_input.trim_end_matches(WHITESPACE);
let trailing_whitespace_len = original_input.len() - trimmed.len();
if trailing_whitespace_len > 0 {
parse_error.add(SyntaxError::new(
SyntaxErrorKind::DisallowedTrailingWhitespace,
trailing_whitespace_len,
trailing_whitespace_len,
));
}
}

if parse_error.is_empty() {
Ok(JsonPathQuery { segments })
} else {
Expand Down
8 changes: 6 additions & 2 deletions crates/rsonpath/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use color_eyre::{eyre::Result, Help};
use error::{report_compiler_error, report_parser_error};
use log::*;
use rsonpath_lib::automaton::Automaton;
use rsonpath_syntax::JsonPathQuery;
use rsonpath_syntax::{JsonPathQuery, ParserBuilder};
use runner::Runner;

mod args;
Expand Down Expand Up @@ -57,7 +57,11 @@ fn run_with_args(args: &Args) -> Result<()> {
}

fn parse_query(query_string: &str) -> Result<JsonPathQuery> {
rsonpath_syntax::parse(query_string)
let mut parser_builder = ParserBuilder::default();
parser_builder.allow_surrounding_whitespace(true);
let parser: rsonpath_syntax::Parser = parser_builder.into();
parser
.parse(query_string)
.map_err(|err| report_parser_error(err).wrap_err("Could not parse JSONPath query."))
}

Expand Down
6 changes: 6 additions & 0 deletions crates/rsonpath/tests/cmd/query_with_leading_ws.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bin = { name = "rq" }

args = [" $", "--json", "{}"]
stdout = """
{}
"""
6 changes: 6 additions & 0 deletions crates/rsonpath/tests/cmd/query_with_trailing_ws.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
bin = { name = "rq" }

args = ["$ ", "--json", "{}"]
stdout = """
{}
"""

0 comments on commit a22668e

Please sign in to comment.