From e6f30fb15f7ed3e875ff97acb09bb44868f3437f Mon Sep 17 00:00:00 2001 From: Mateusz Gienieczko Date: Thu, 11 Jan 2024 13:26:00 +0100 Subject: [PATCH 1/5] test: added jsonpath cts - Parser is now tests with the official [JSONPath Compliance Test Suite](https://github.com/jsonpath-standard/jsonpath-compliance-test-suite) --- .github/workflows/test-codegen.yml | 2 +- .gitmodules | 3 + Cargo.lock | 19 ++++ crates/rsonpath-benchmarks | 2 +- crates/rsonpath-test-codegen/src/gen.rs | 5 +- crates/rsonpath-test/.gitignore | 2 +- crates/rsonpath-test/Cargo.toml | 4 + crates/rsonpath-test/build.rs | 4 +- .../jsonpath-compliance-test-suite | 1 + crates/rsonpath-test/src/lib.rs | 102 ++++++++++++++++++ crates/rsonpath-test/tests/cts.rs | 72 +++++++++++++ crates/rsonpath-test/tests/tests.rs | 2 + 12 files changed, 211 insertions(+), 7 deletions(-) create mode 160000 crates/rsonpath-test/jsonpath-compliance-test-suite create mode 100644 crates/rsonpath-test/tests/cts.rs create mode 100644 crates/rsonpath-test/tests/tests.rs diff --git a/.github/workflows/test-codegen.yml b/.github/workflows/test-codegen.yml index 9ba96ec5..db8997c9 100644 --- a/.github/workflows/test-codegen.yml +++ b/.github/workflows/test-codegen.yml @@ -71,5 +71,5 @@ jobs: name: rsonpath-test-documents path: | crates/rsonpath-test/documents - crates/rsonpath-test/tests + crates/rsonpath-test/tests/generated retention-days: 1 diff --git a/.gitmodules b/.gitmodules index f41649bc..faf163c6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "crates/rsonpath-benchmarks"] path = crates/rsonpath-benchmarks url = git@github.com:V0ldek/rsonpath-benchmarks.git +[submodule "crates/rsonpath-test/jsonpath-compliance-test-suite"] + path = crates/rsonpath-test/jsonpath-compliance-test-suite + url = https://github.com/jsonpath-standard/jsonpath-compliance-test-suite.git diff --git a/Cargo.lock b/Cargo.lock index 2d356499..b9e9dd7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -860,6 +860,8 @@ dependencies = [ "rsonpath-lib", "rsonpath-syntax", "rsonpath-test-codegen", + "serde", + "serde_json", ] [[package]] @@ -926,6 +928,12 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "ryu" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" + [[package]] name = "same-file" version = "1.0.6" @@ -961,6 +969,17 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "serde_json" +version = "1.0.111" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" +dependencies = [ + "itoa", + "ryu", + "serde", +] + [[package]] name = "serde_spanned" version = "0.6.5" diff --git a/crates/rsonpath-benchmarks b/crates/rsonpath-benchmarks index 31313913..42fdd491 160000 --- a/crates/rsonpath-benchmarks +++ b/crates/rsonpath-benchmarks @@ -1 +1 @@ -Subproject commit 313139137c73eb8722217dffa098a45cca630ed3 +Subproject commit 42fdd4919021daa74782aec852757830ea798614 diff --git a/crates/rsonpath-test-codegen/src/gen.rs b/crates/rsonpath-test-codegen/src/gen.rs index f6f78419..6bacdea1 100644 --- a/crates/rsonpath-test-codegen/src/gen.rs +++ b/crates/rsonpath-test-codegen/src/gen.rs @@ -134,7 +134,7 @@ pub(crate) fn generate_test_fns(files: &mut Files) -> Result<(), io::Error> { #(#tests_mod)* }; - files.add_rust_file("tests.rs", &tests_source); + files.add_rust_file("mod.rs", &tests_source); return Ok(()); @@ -234,7 +234,8 @@ pub(crate) fn generate_test_fns(files: &mut Files) -> Result<(), io::Error> { let mut result = vec![]; #engine_ident.indices(&#input_ident, &mut result)?; - assert_eq!(result, vec![#(#indices,)*], "result != expected"); + let expected: Vec = vec![#(#indices,)*]; + assert_eq!(result, expected, "result != expected"); } } ResultTypeToTest::ApproximateSpans(spans) => { diff --git a/crates/rsonpath-test/.gitignore b/crates/rsonpath-test/.gitignore index eac28597..226242be 100644 --- a/crates/rsonpath-test/.gitignore +++ b/crates/rsonpath-test/.gitignore @@ -3,4 +3,4 @@ !/documents/json/large /documents/json/large/compressed /documents/toml/compressed -/tests \ No newline at end of file +/tests/generated \ No newline at end of file diff --git a/crates/rsonpath-test/Cargo.toml b/crates/rsonpath-test/Cargo.toml index 0a8aa5e0..5353496a 100644 --- a/crates/rsonpath-test/Cargo.toml +++ b/crates/rsonpath-test/Cargo.toml @@ -13,6 +13,10 @@ edition = "2021" rust-version = "1.70.0" publish = false +[dependencies] +serde = { version = "1.0.195", features = ["derive"] } +serde_json = "1.0.111" + [dev-dependencies] pretty_assertions = "1.4.0" rsonpath-lib = { version = "0.8.5", path = "../rsonpath-lib" } diff --git a/crates/rsonpath-test/build.rs b/crates/rsonpath-test/build.rs index a5dffb0c..05216682 100644 --- a/crates/rsonpath-test/build.rs +++ b/crates/rsonpath-test/build.rs @@ -3,8 +3,8 @@ use std::{fs, io::ErrorKind, process::Command}; const TOML_DIRECTORY_PATH: &str = "documents/toml"; const JSON_DIRECTORY_PATH: &str = "documents/json"; -const TEST_OUTPUT_PATH: &str = "tests"; -const GEN_RUST_GLOB: &str = "tests/**/*.rs"; +const TEST_OUTPUT_PATH: &str = "tests/generated"; +const GEN_RUST_GLOB: &str = "tests/generated/**/*.rs"; const RUSTFMT_TOML_PATH: &str = "../../rustfmt.toml"; const CONTROL_ENV_VAR: &str = "RSONPATH_ENABLE_TEST_CODEGEN"; diff --git a/crates/rsonpath-test/jsonpath-compliance-test-suite b/crates/rsonpath-test/jsonpath-compliance-test-suite new file mode 160000 index 00000000..446336cd --- /dev/null +++ b/crates/rsonpath-test/jsonpath-compliance-test-suite @@ -0,0 +1 @@ +Subproject commit 446336cd6651586f416a3b546c70bdd0fa2022c0 diff --git a/crates/rsonpath-test/src/lib.rs b/crates/rsonpath-test/src/lib.rs index 8b137891..c6d28e73 100644 --- a/crates/rsonpath-test/src/lib.rs +++ b/crates/rsonpath-test/src/lib.rs @@ -1 +1,103 @@ +use std::{fs::File, io, path::Path}; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Tag { + Basic, + Filter, + Function, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TestSuite { + tests: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct TestCase { + pub name: String, + pub selector: String, + #[serde(default)] + pub document: serde_json::Value, + #[serde(default)] + pub result: Vec, + #[serde(default)] + pub invalid_selector: bool, +} + +#[derive(Debug, Clone)] +pub struct TaggedTestCase { + pub tag: Tag, + pub test_case: TestCase, +} + +/// Read and tag test cases from the base jsonpath-compliance-test-suite path. +pub fn read_and_tag>(path: P) -> Result, io::Error> { + let tests = path.as_ref().join("tests"); + let functions_tests = tests.join("functions"); + let whitespace_tests = tests.join("whitespace"); + + let basic = tests.join("basic.json"); + let filter = tests.join("filter.json"); + let index_selector = tests.join("index_selector.json"); + let name_selector = tests.join("name_selector.json"); + let slice_selector = tests.join("slice_selector.json"); + + let functions_count = functions_tests.join("count.json"); + let functions_length = functions_tests.join("length.json"); + let functions_match = functions_tests.join("match.json"); + let functions_search = functions_tests.join("search.json"); + let functions_value = functions_tests.join("value.json"); + + let whitespace_filter = whitespace_tests.join("filter.json"); + let whitespace_functions = whitespace_tests.join("functions.json"); + let whitespace_operators = whitespace_tests.join("operators.json"); + let whitespace_selectors = whitespace_tests.join("selectors.json"); + let whitespace_slice = whitespace_tests.join("slice.json"); + + let mut collection = TaggedTestCollection::new(); + + collection.read_file_and_tag(basic, Tag::Basic)?; + collection.read_file_and_tag(filter, Tag::Filter)?; + collection.read_file_and_tag(index_selector, Tag::Basic)?; + collection.read_file_and_tag(name_selector, Tag::Basic)?; + collection.read_file_and_tag(slice_selector, Tag::Basic)?; + collection.read_file_and_tag(functions_count, Tag::Function)?; + collection.read_file_and_tag(functions_length, Tag::Function)?; + collection.read_file_and_tag(functions_match, Tag::Function)?; + collection.read_file_and_tag(functions_search, Tag::Function)?; + collection.read_file_and_tag(functions_value, Tag::Function)?; + collection.read_file_and_tag(whitespace_filter, Tag::Filter)?; + collection.read_file_and_tag(whitespace_functions, Tag::Function)?; + collection.read_file_and_tag(whitespace_operators, Tag::Filter)?; + collection.read_file_and_tag(whitespace_selectors, Tag::Basic)?; + collection.read_file_and_tag(whitespace_slice, Tag::Basic)?; + + Ok(collection.get()) +} + +struct TaggedTestCollection { + cases: Vec, +} + +impl TaggedTestCollection { + fn new() -> Self { + Self { cases: vec![] } + } + + fn read_file_and_tag>(&mut self, file: P, tag: Tag) -> Result<(), io::Error> { + let file = File::open(file.as_ref())?; + let deser: TestSuite = serde_json::from_reader(file)?; + + for test_case in deser.tests { + self.cases.push(TaggedTestCase { tag, test_case }) + } + + Ok(()) + } + + fn get(self) -> Vec { + self.cases + } +} diff --git a/crates/rsonpath-test/tests/cts.rs b/crates/rsonpath-test/tests/cts.rs new file mode 100644 index 00000000..eac09237 --- /dev/null +++ b/crates/rsonpath-test/tests/cts.rs @@ -0,0 +1,72 @@ +use std::io; + +use rsonpath_test::{Tag, TaggedTestCase}; + +const CTS_PATH: &str = "jsonpath-compliance-test-suite"; + +#[test] +fn test_cts() -> Result<(), io::Error> { + let collection = rsonpath_test::read_and_tag(CTS_PATH)?; + let results: Vec<_> = collection.into_iter().map(test_one).collect(); + let mut success = true; + + for (name, result) in results { + match result { + TestResult::Passed => eprintln!("v {name} passed"), + TestResult::Ignored => eprintln!("? {name} ignored"), + TestResult::Failed(err) => { + success = false; + eprintln!("x {name} failed\n{err}"); + } + } + } + + assert!(success); + + Ok(()) +} + +fn test_one(t: TaggedTestCase) -> (String, TestResult) { + let (tag, test_case) = (t.tag, t.test_case); + if !does_parser_support(tag) { + return (test_case.name, TestResult::Ignored); + } + + let parser_result = rsonpath_syntax::parse(&test_case.selector); + + if test_case.invalid_selector { + if parser_result.is_ok() { + let err = format!( + "test case {} is supposed to fail, but parser accepted the query\nparse result: {:?}", + test_case.name, + parser_result.unwrap() + ); + return (test_case.name, TestResult::Failed(err)); + } + return (test_case.name, TestResult::Passed); + } + + if parser_result.is_err() { + let err = format!( + "test case {} failed to parse\nparse error: {}", + test_case.name, + parser_result.unwrap_err() + ); + return (test_case.name, TestResult::Failed(err)); + } + + (test_case.name, TestResult::Passed) +} + +fn does_parser_support(tag: Tag) -> bool { + match tag { + Tag::Basic => true, + Tag::Filter | Tag::Function => false, + } +} + +enum TestResult { + Passed, + Ignored, + Failed(String), +} diff --git a/crates/rsonpath-test/tests/tests.rs b/crates/rsonpath-test/tests/tests.rs new file mode 100644 index 00000000..e619b871 --- /dev/null +++ b/crates/rsonpath-test/tests/tests.rs @@ -0,0 +1,2 @@ +mod cts; +mod generated; From 830303db395ad2b3772bcd5c49771970029b6fd9 Mon Sep 17 00:00:00 2001 From: Mateusz Gienieczko Date: Fri, 12 Jan 2024 16:56:25 +0100 Subject: [PATCH 2/5] fix: bug in `-c` graph display - dot format was temporarily broken by doubling double quotes in labels --- crates/rsonpath-lib/src/automaton.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rsonpath-lib/src/automaton.rs b/crates/rsonpath-lib/src/automaton.rs index f5819ce6..9d03d5c9 100644 --- a/crates/rsonpath-lib/src/automaton.rs +++ b/crates/rsonpath-lib/src/automaton.rs @@ -76,7 +76,7 @@ impl Display for TransitionLabel<'_> { #[inline(always)] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - TransitionLabel::ObjectMember(name) => write!(f, "{}", name.quoted()), + TransitionLabel::ObjectMember(name) => write!(f, "{}", name.unquoted()), TransitionLabel::ArrayIndex(index) => write!(f, "{}", index.as_u64()), } } From 015fc0b8ed30643600dcaee7001679e7f0fdd793 Mon Sep 17 00:00:00 2001 From: Mateusz Gienieczko Date: Fri, 12 Jan 2024 16:58:09 +0100 Subject: [PATCH 3/5] fix: u001A-u001F in name selectors - Characters U+001A through U+001F were erroneously accepted unescaped. This is now a hard error. --- crates/rsonpath-syntax/src/parser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rsonpath-syntax/src/parser.rs b/crates/rsonpath-syntax/src/parser.rs index 85c653e5..35dcaaed 100644 --- a/crates/rsonpath-syntax/src/parser.rs +++ b/crates/rsonpath-syntax/src/parser.rs @@ -343,7 +343,7 @@ fn string<'a>(mode: StringParseMode) -> impl FnMut(&'a str) -> IResult<&'a str, Err(nom::Err::Failure(InternalParseError::SyntaxErrors(syntax_errors, rest))) }; } - (..='\u{0019}', _) => { + (..='\u{001F}', _) => { let rest = stream.peek().map_or("", |(i, _)| &q[*i..]); syntax_errors.push(SyntaxError::new( SyntaxErrorKind::InvalidUnescapedCharacter, From 91dd1977d82dde70b20a84717fac2c5802e84e66 Mon Sep 17 00:00:00 2001 From: Mateusz Gienieczko Date: Fri, 12 Jan 2024 16:58:37 +0100 Subject: [PATCH 4/5] feat!: added `Slice` selectors --- crates/rsonpath-lib/src/automaton/nfa.rs | 2 + crates/rsonpath-lib/src/error.rs | 8 + crates/rsonpath-syntax/src/error.rs | 18 +- crates/rsonpath-syntax/src/lib.rs | 220 ++++++++++++++++++++++- crates/rsonpath-syntax/src/num.rs | 18 ++ crates/rsonpath-syntax/src/parser.rs | 99 ++++++++-- 6 files changed, 341 insertions(+), 24 deletions(-) diff --git a/crates/rsonpath-lib/src/automaton/nfa.rs b/crates/rsonpath-lib/src/automaton/nfa.rs index 07309907..aed7f582 100644 --- a/crates/rsonpath-lib/src/automaton/nfa.rs +++ b/crates/rsonpath-lib/src/automaton/nfa.rs @@ -77,12 +77,14 @@ impl<'q> NondeterministicAutomaton<'q> { Selector::Wildcard => Ok(Direct(Transition::Wildcard)), Selector::Index(Index::FromStart(index)) => Ok(Direct(Transition::Labelled((*index).into()))), Selector::Index(Index::FromEnd(_)) => Err(UnsupportedFeatureError::indexing_from_end().into()), + Selector::Slice(_) => Err(UnsupportedFeatureError::slice_selector().into()), }, Segment::Descendant(selectors) if selectors.len() == 1 => match selectors.first() { Selector::Name(name) => Ok(Recursive(Transition::Labelled(name.into()))), Selector::Wildcard => Ok(Recursive(Transition::Wildcard)), Selector::Index(Index::FromStart(index)) => Ok(Recursive(Transition::Labelled((*index).into()))), Selector::Index(Index::FromEnd(_)) => Err(UnsupportedFeatureError::indexing_from_end().into()), + Selector::Slice(_) => Err(UnsupportedFeatureError::slice_selector().into()), }, _ => Err(UnsupportedFeatureError::multiple_selectors().into()), }) diff --git a/crates/rsonpath-lib/src/error.rs b/crates/rsonpath-lib/src/error.rs index 97e32f98..6373824d 100644 --- a/crates/rsonpath-lib/src/error.rs +++ b/crates/rsonpath-lib/src/error.rs @@ -146,6 +146,14 @@ impl UnsupportedFeatureError { Self::untracked("Indexing from End") } + /// Slice Selector – supporting slice selectors. + /// https://github.com/V0ldek/rsonpath/issues/152 + #[must_use] + #[inline(always)] + pub fn slice_selector() -> Self { + Self::tracked(152, "Slice Selector") + } + /// Returns the issue number on GitHub corresponding to the unsupported feature. /// Is [`None`] if the feature is not planned. #[must_use] diff --git a/crates/rsonpath-syntax/src/error.rs b/crates/rsonpath-syntax/src/error.rs index ef4d3197..f47f5aec 100644 --- a/crates/rsonpath-syntax/src/error.rs +++ b/crates/rsonpath-syntax/src/error.rs @@ -126,6 +126,9 @@ pub(crate) enum SyntaxErrorKind { NegativeZeroInteger, LeadingZeros, IndexParseError(JsonIntParseError), + SliceStartParseError(JsonIntParseError), + SliceEndParseError(JsonIntParseError), + SliceStepParseError(JsonIntParseError), } impl SyntaxError { @@ -266,9 +269,12 @@ impl SyntaxError { suggestion.remove(start_idx + offset, remove_len); } } - SyntaxErrorKind::InvalidSelector | SyntaxErrorKind::IndexParseError(_) | SyntaxErrorKind::EmptySelector => { - suggestion.invalidate() - } + SyntaxErrorKind::InvalidSelector + | SyntaxErrorKind::IndexParseError(_) + | SyntaxErrorKind::SliceStartParseError(_) + | SyntaxErrorKind::SliceStepParseError(_) + | SyntaxErrorKind::SliceEndParseError(_) + | SyntaxErrorKind::EmptySelector => suggestion.invalidate(), } // Generic notes. @@ -660,6 +666,9 @@ impl SyntaxErrorKind { Self::NegativeZeroInteger => "negative zero used as an integer".to_string(), Self::LeadingZeros => "integer with leading zeros".to_string(), Self::IndexParseError(_) => "invalid index value".to_string(), + Self::SliceStartParseError(_) => "invalid slice start".to_string(), + Self::SliceEndParseError(_) => "invalid slice end".to_string(), + Self::SliceStepParseError(_) => "invalid slice step value".to_string(), } } @@ -686,6 +695,9 @@ impl SyntaxErrorKind { Self::NegativeZeroInteger => "negative zero is not allowed".to_string(), Self::LeadingZeros => "leading zeros are not allowed".to_string(), Self::IndexParseError(inner) => format!("this index value is invalid; {inner}"), + Self::SliceStartParseError(inner) => format!("this start index is invalid; {inner}"), + Self::SliceEndParseError(inner) => format!("this end index is invalid; {inner}"), + Self::SliceStepParseError(inner) => format!("this step value is invalid; {inner}"), } } } diff --git a/crates/rsonpath-syntax/src/lib.rs b/crates/rsonpath-syntax/src/lib.rs index 4273dbc0..c03b552a 100644 --- a/crates/rsonpath-syntax/src/lib.rs +++ b/crates/rsonpath-syntax/src/lib.rs @@ -305,7 +305,7 @@ impl Parser { /// Every query is a sequence of zero or more of segments, /// each applying one or more selectors to a node and passing it along to the /// subsequent segments. -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Eq, Clone, Hash)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum Segment { /// A child segment contains a sequence of selectors, @@ -333,14 +333,14 @@ impl<'a> arbitrary::Arbitrary<'a> for Selectors { /// Collection of one or more [`Selector`] instances. /// /// Guaranteed to be non-empty. -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Eq, Clone, Hash)] pub struct Selectors { inner: Vec, } /// Each [`Segment`] defines one or more selectors. /// A selector produces one or more children/descendants of the node it is applied to. -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Eq, Clone, Hash)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub enum Selector { /// A name selector selects at most one object member value under the key equal to the @@ -351,10 +351,13 @@ pub enum Selector { /// An index selector matches at most one array element value, /// depending on the selector's [`Index`]. Index(Index), + // A slice selector matches elements from arrays starting at a given index, + // ending at a given index, and incrementing with a specified step. + Slice(Slice), } /// Directional index into a JSON array. -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] pub enum Index { /// Zero-based index from the start of the array. FromStart(num::JsonUInt), @@ -386,8 +389,187 @@ impl From for Index { } } +/// Directional step offset within a JSON array. +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] +pub enum Step { + // Step forward by a given offset amount. + Forward(num::JsonUInt), + /// Step backward by a given offset amount. + Backward(num::JsonNonZeroUInt), +} + +// We don't derive this because Backward(0) is not a valid step. +#[cfg(feature = "arbitrary")] +#[cfg_attr(docsrs, doc(cfg(feature = "arbitrary")))] +impl<'a> arbitrary::Arbitrary<'a> for Step { + #[inline] + fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { + let num = u.arbitrary::()?; + Ok(Self::from(num)) + } +} + +impl From for Step { + #[inline] + fn from(value: num::JsonInt) -> Self { + if value.as_i64() >= 0 { + Self::Forward(value.abs()) + } else { + Self::Backward(value.abs().try_into().expect("checked for zero already")) + } + } +} + +/// Slice selector defining the start and end bounds, as well as the step value and direction. +/// +/// The start index is inclusive defaults to `Index::FromStart(0)`. +/// +/// The end index is exclusive and optional. +/// If `None`, the end of the slice depends on the step direction: +/// - if going forward, the end is `len` of the array; +/// - if going backward, the end is 0. +/// +/// The step defaults to `Step::Forward(1)`. Note that `Step::Forward(0)` is a valid +/// value and is specified to result in an empty slice, regardless of `start` and `end`. +/// +/// # Examples +/// ``` +/// # use rsonpath_syntax::{Slice, Index, Step, num::JsonUInt}; +/// let slice = Slice::default(); +/// assert_eq!(slice.start(), Index::FromStart(JsonUInt::ZERO)); +/// assert_eq!(slice.end(), None); +/// assert_eq!(slice.step(), Step::Forward(JsonUInt::ONE)); +/// ``` +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] +pub struct Slice { + start: Index, + end: Option, + step: Step, +} + +/// Helper API for programmatically constructing [`Slice`] instances. +/// +/// # Examples +/// ``` +/// # use rsonpath_syntax::{Slice, SliceBuilder, Index, Step, num::JsonUInt}; +/// let mut builder = SliceBuilder::new(); +/// +/// builder +/// .with_start(Index::FromEnd(3.try_into().unwrap())) +/// .with_end(Index::FromStart(1.into())) +/// .with_step(Step::Backward(7.try_into().unwrap())); +/// +/// let slice: Slice = builder.into(); +/// assert_eq!(slice.to_string(), "-3:1:-7"); +/// ``` +pub struct SliceBuilder { + inner: Slice, +} + +impl Slice { + const DEFAULT_START: Index = Index::FromStart(num::JsonUInt::ZERO); + const DEFAULT_STEP: Step = Step::Forward(num::JsonUInt::ONE); + + /// Create a new [`Slice`] from given bounds and step. + #[inline(always)] + #[must_use] + pub fn new(start: Index, end: Option, step: Step) -> Self { + Self { start, end, step } + } + + /// Get the start index of the [`Slice`]. + #[inline(always)] + #[must_use] + pub fn start(&self) -> Index { + self.start + } + + /// Get the end index of the [`Slice`]. + #[inline(always)] + #[must_use] + pub fn end(&self) -> Option { + self.end + } + + /// Get the step of the [`Slice`]. + #[inline(always)] + #[must_use] + pub fn step(&self) -> Step { + self.step + } +} + +impl Default for Slice { + #[inline] + fn default() -> Self { + Self { + start: Index::FromStart(0.into()), + end: None, + step: Step::Forward(1.into()), + } + } +} + +impl SliceBuilder { + /// Create a new [`Slice`] configuration with default values. + #[inline] + #[must_use] + pub fn new() -> Self { + Self { + inner: Slice::default(), + } + } + + /// Set the start of the [`Slice`]. + #[inline] + pub fn with_start(&mut self, start: Index) -> &mut Self { + self.inner.start = start; + self + } + + /// Set the end of the [`Slice`]. + #[inline] + pub fn with_end(&mut self, end: Index) -> &mut Self { + self.inner.end = Some(end); + self + } + + /// Set the step of the [`Slice`]. + #[inline] + pub fn with_step(&mut self, step: Step) -> &mut Self { + self.inner.step = step; + self + } + + /// Get the configured [`Slice`] instance. + /// + /// This does not consume the builder. For a consuming variant use the `Into` impl. + #[inline] + #[must_use] + pub fn to_slice(&mut self) -> Slice { + self.inner.clone() + } +} + +impl From for Slice { + #[inline] + #[must_use] + fn from(value: SliceBuilder) -> Self { + value.inner + } +} + +impl Default for SliceBuilder { + #[inline(always)] + #[must_use] + fn default() -> Self { + Self::new() + } +} + /// JSONPath query structure represented by a sequence of [`Segments`](Segment). -#[derive(Debug, PartialEq, Eq, Clone)] +#[derive(Debug, PartialEq, Eq, Clone, Hash)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct JsonPathQuery { segments: Vec, @@ -604,6 +786,7 @@ impl Display for Selector { Self::Name(n) => write!(f, "'{}'", str::escape(n.unquoted(), str::EscapeMode::SingleQuoted)), Self::Wildcard => write!(f, "*"), Self::Index(idx) => write!(f, "{idx}"), + Self::Slice(slice) => write!(f, "{slice}"), } } } @@ -618,6 +801,33 @@ impl Display for Index { } } +impl Display for Step { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Forward(idx) => write!(f, "{idx}"), + Self::Backward(idx) => write!(f, "-{idx}"), + } + } +} + +impl Display for Slice { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.start != Self::DEFAULT_START { + write!(f, "{}", self.start)?; + } + write!(f, ":")?; + if let Some(end) = self.end { + write!(f, "{end}")?; + } + if self.step != Self::DEFAULT_STEP { + write!(f, ":{}", self.step)?; + } + Ok(()) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/rsonpath-syntax/src/num.rs b/crates/rsonpath-syntax/src/num.rs index 250de107..87159141 100644 --- a/crates/rsonpath-syntax/src/num.rs +++ b/crates/rsonpath-syntax/src/num.rs @@ -129,6 +129,15 @@ impl JsonInt { /// ``` pub const ZERO: Self = Self::new(0); + /// A constant value of one. + /// + /// # Examples + /// ``` + /// # use rsonpath_syntax::num::JsonInt; + /// assert_eq!(JsonInt::ONE.as_i64(), 1); + /// ``` + pub const ONE: Self = Self::new(1); + /// A constant for the smallest expressible value. /// /// # Examples @@ -246,6 +255,15 @@ impl JsonUInt { /// ``` pub const ZERO: Self = Self::new(0); + /// A constant value of one. + /// + /// # Examples + /// ``` + /// # use rsonpath_syntax::num::JsonUInt; + /// assert_eq!(JsonUInt::ONE.as_u64(), 1); + /// ``` + pub const ONE: Self = Self::new(1); + /// A constant for the largest expressible value. /// /// # Examples diff --git a/crates/rsonpath-syntax/src/parser.rs b/crates/rsonpath-syntax/src/parser.rs index 35dcaaed..864ec9d1 100644 --- a/crates/rsonpath-syntax/src/parser.rs +++ b/crates/rsonpath-syntax/src/parser.rs @@ -1,8 +1,8 @@ use crate::{ error::{InternalParseError, ParseErrorBuilder, SyntaxError, SyntaxErrorKind}, - num::{JsonInt, JsonUInt}, + num::{error::JsonIntParseError, JsonInt, JsonNonZeroUInt, JsonUInt}, str::{JsonString, JsonStringBuilder}, - Index, JsonPathQuery, ParserOptions, Result, Segment, Selector, Selectors, + Index, JsonPathQuery, ParserOptions, Result, Segment, Selector, Selectors, SliceBuilder, Step, }; use nom::{branch::*, bytes::complete::*, character::complete::*, combinator::*, multi::*, sequence::*, *}; use std::{iter::Peekable, str::FromStr}; @@ -225,6 +225,7 @@ fn selector(q: &str) -> IResult<&str, Selector, InternalParseError> { alt(( ignore_whitespace(name_selector), ignore_whitespace(wildcard_selector), + ignore_whitespace(slice_selector), ignore_whitespace(index_selector), failed_selector, ))(q) @@ -250,22 +251,62 @@ fn wildcard_selector(q: &str) -> IResult<&str, Selector, InternalParseError> { map(tag("*"), |_| Selector::Wildcard)(q) } -fn index_selector(q: &str) -> IResult<&str, Selector, InternalParseError> { - let (rest, int) = int(q)?; - match JsonInt::from_str(int) { - Ok(int) => { - if let Ok(uint) = JsonUInt::try_from(int) { - Ok((rest, Selector::Index(Index::FromStart(uint)))) - } else { - Ok(( +fn slice_selector(q: &str) -> IResult<&str, Selector, InternalParseError> { + let (rest, opt_start) = terminated(opt(int), ignore_whitespace(char(':')))(q)?; + // We have parsed a ':', so this *must* be a slice selector. Any errors after here are fatal. + let mut slice = SliceBuilder::new(); + + if let Some(start_str) = opt_start { + match parse_directional_int(start_str) { + DirectionalInt::Plus(int) => slice.with_start(Index::FromStart(int)), + DirectionalInt::Minus(int) => slice.with_start(Index::FromEnd(int)), + DirectionalInt::Error(err) => { + return fail( + SyntaxErrorKind::SliceStartParseError(err), + q.len(), + start_str.len(), rest, - Selector::Index(Index::FromEnd( - int.abs().try_into().expect("zero would convert to JsonUInt above"), - )), - )) + ); } - } - Err(err) => Err(Err::Failure(InternalParseError::SyntaxError( + }; + } + let q = rest; + let (rest, opt_end) = opt(ignore_whitespace(int))(q)?; + + if let Some(end_str) = opt_end { + match parse_directional_int(end_str) { + DirectionalInt::Plus(int) => slice.with_end(Index::FromStart(int)), + DirectionalInt::Minus(int) => slice.with_end(Index::FromEnd(int)), + DirectionalInt::Error(err) => { + return fail(SyntaxErrorKind::SliceEndParseError(err), q.len(), end_str.len(), rest); + } + }; + } + + let q = rest; + let (rest, opt_step) = opt(ignore_whitespace(preceded(char(':'), opt(ignore_whitespace(int)))))(q)?; + + if let Some(Some(step_str)) = opt_step { + match parse_directional_int(step_str) { + DirectionalInt::Plus(int) => slice.with_step(Step::Forward(int)), + DirectionalInt::Minus(int) => slice.with_step(Step::Backward(int)), + DirectionalInt::Error(err) => { + return fail(SyntaxErrorKind::SliceStepParseError(err), q.len(), step_str.len(), rest); + } + }; + } + + Ok((rest, Selector::Slice(slice.into()))) +} + +fn index_selector(q: &str) -> IResult<&str, Selector, InternalParseError> { + // This has to be called after the slice selector. + // Thanks to that we can make a hard cut if we parsed an integer but it doesn't work as an index. + let (rest, int) = int(q)?; + match parse_directional_int(int) { + DirectionalInt::Plus(int) => Ok((rest, Selector::Index(Index::FromStart(int)))), + DirectionalInt::Minus(int) => Ok((rest, Selector::Index(Index::FromEnd(int)))), + DirectionalInt::Error(err) => Err(Err::Failure(InternalParseError::SyntaxError( SyntaxError::new(SyntaxErrorKind::IndexParseError(err), q.len(), int.len()), rest, ))), @@ -295,6 +336,25 @@ fn failed_selector(q: &str) -> IResult<&str, Selector, InternalParseError> { ))) } +enum DirectionalInt { + Plus(JsonUInt), + Minus(JsonNonZeroUInt), + Error(JsonIntParseError), +} + +fn parse_directional_int(int_str: &str) -> DirectionalInt { + match JsonInt::from_str(int_str) { + Ok(int) => { + if let Ok(uint) = JsonUInt::try_from(int) { + DirectionalInt::Plus(uint) + } else { + DirectionalInt::Minus(int.abs().try_into().expect("zero would convert to JsonUInt above")) + } + } + Err(err) => DirectionalInt::Error(err), + } +} + fn int(q: &str) -> IResult<&str, &str, InternalParseError> { let (rest, int) = recognize(alt((preceded(char('-'), digit1), digit1)))(q)?; @@ -485,6 +545,13 @@ fn string<'a>(mode: StringParseMode) -> impl FnMut(&'a str) -> IResult<&'a str, } } +fn fail(kind: SyntaxErrorKind, rev_idx: usize, err_len: usize, rest: &str) -> IResult<&str, T, InternalParseError> { + Err(Err::Failure(InternalParseError::SyntaxError( + SyntaxError::new(kind, rev_idx, err_len), + rest, + ))) +} + #[cfg(test)] mod tests { use crate::{str::JsonString, Index, Selector}; From 969bbc74eb5a323f08124c815a922ed231937856 Mon Sep 17 00:00:00 2001 From: Mateusz Gienieczko Date: Mon, 15 Jan 2024 12:01:15 +0100 Subject: [PATCH 5/5] feat: parsing for the slice selector --- crates/rsonpath-lib/src/error.rs | 2 +- crates/rsonpath-syntax/src/builder.rs | 70 +++++- crates/rsonpath-syntax/src/lib.rs | 23 +- crates/rsonpath-syntax/src/parser.rs | 98 ++++++-- .../tests/query_parser_tests.rs | 229 +----------------- 5 files changed, 154 insertions(+), 268 deletions(-) diff --git a/crates/rsonpath-lib/src/error.rs b/crates/rsonpath-lib/src/error.rs index 6373824d..a055ae7e 100644 --- a/crates/rsonpath-lib/src/error.rs +++ b/crates/rsonpath-lib/src/error.rs @@ -147,7 +147,7 @@ impl UnsupportedFeatureError { } /// Slice Selector – supporting slice selectors. - /// https://github.com/V0ldek/rsonpath/issues/152 + /// #[must_use] #[inline(always)] pub fn slice_selector() -> Self { diff --git a/crates/rsonpath-syntax/src/builder.rs b/crates/rsonpath-syntax/src/builder.rs index 110ae5bd..2e012db0 100644 --- a/crates/rsonpath-syntax/src/builder.rs +++ b/crates/rsonpath-syntax/src/builder.rs @@ -1,6 +1,6 @@ //! Utility for building a [`JsonPathQuery`](`crate::JsonPathQuery`) //! programmatically. -use crate::{num::JsonInt, str::JsonString, Index, JsonPathQuery, Segment, Selector, Selectors}; +use crate::{num::JsonInt, str::JsonString, Index, JsonPathQuery, Segment, Selector, Selectors, SliceBuilder}; /// Builder for [`JsonPathQuery`] instances. /// @@ -13,12 +13,13 @@ use crate::{num::JsonInt, str::JsonString, Index, JsonPathQuery, Segment, Select /// .descendant_name("b") /// .child_wildcard() /// .child_name("c") -/// .descendant_wildcard(); +/// .descendant_wildcard() +/// .child_slice(|x| x.with_start(3).with_end(-7).with_step(2)); /// /// // Can also use `builder.build()` as a non-consuming version. /// let query: JsonPathQuery = builder.into(); /// -/// assert_eq!(query.to_string(), "$['a']..['b'][*]['c']..[*]"); +/// assert_eq!(query.to_string(), "$['a']..['b'][*]['c']..[*][3:-7:2]"); /// ``` pub struct JsonPathQueryBuilder { segments: Vec, @@ -133,6 +134,17 @@ impl JsonPathQueryBuilder { self.child(|x| x.index(idx)) } + /// Add a child segment with a single slice selector. + /// + /// This is a shorthand for `.child(|x| x.slice(slice_builder))`. + #[inline(always)] + pub fn child_slice(&mut self, slice_builder: F) -> &mut Self + where + F: FnOnce(&mut SliceBuilder) -> &mut SliceBuilder, + { + self.child(|x| x.slice(slice_builder)) + } + /// Add a descendant segment with a single name selector. /// /// This is a shorthand for `.descendant(|x| x.name(name))`. @@ -157,6 +169,17 @@ impl JsonPathQueryBuilder { self.descendant(|x| x.index(idx)) } + /// Add a descendant segment with a single slice selector. + /// + /// This is a shorthand for `.descendant(|x| x.slice(slice_builder))`. + #[inline(always)] + pub fn descendant_slice(&mut self, slice_builder: F) -> &mut Self + where + F: FnOnce(&mut SliceBuilder) -> &mut SliceBuilder, + { + self.descendant(|x| x.slice(slice_builder)) + } + /// Produce a [`JsonPathQuery`] from the builder. /// /// This clones all data in the builder to create the query. @@ -225,6 +248,47 @@ impl JsonPathSelectorsBuilder { self } + /// Add a slice selector based on a given start, end, and step integers. + /// + /// The result is a [`Selector::Slice`] with given `start`, `end`, and `step`. + /// + /// ## Examples + /// + /// ```rust + /// # use rsonpath_syntax::{Selector, SliceBuilder, Index, Step, num::{JsonNonZeroUInt, JsonUInt}, builder::JsonPathQueryBuilder}; + /// let mut builder = JsonPathQueryBuilder::new(); + /// builder.child(|x| x + /// .slice(|s| s.with_start(10).with_end(-20).with_step(5)) + /// .slice(|s| s.with_start(-20).with_step(-30))); + /// let result = builder.into_query(); + /// + /// assert_eq!(result.segments().len(), 1); + /// let segment = &result.segments()[0]; + /// let selectors = segment.selectors().as_slice(); + /// match (&selectors[0], &selectors[1]) { + /// (Selector::Slice(s1), Selector::Slice(s2)) => { + /// assert_eq!(s1.start(), Index::FromStart(10.into())); + /// assert_eq!(s1.end(), Some(Index::FromEnd(JsonNonZeroUInt::try_from(20).unwrap()))); + /// assert_eq!(s1.step(), Step::Forward(5.into())); + /// assert_eq!(s2.start(), Index::FromEnd(JsonNonZeroUInt::try_from(20).unwrap())); + /// assert_eq!(s2.end(), None); + /// assert_eq!(s2.step(), Step::Backward(JsonNonZeroUInt::try_from(30).unwrap())); + /// } + /// _ => unreachable!() + /// } + /// ``` + #[inline(always)] + pub fn slice(&mut self, slice_builder: F) -> &mut Self + where + F: FnOnce(&mut SliceBuilder) -> &mut SliceBuilder, + { + let mut slice = SliceBuilder::new(); + slice_builder(&mut slice); + let slice = slice.into(); + self.selectors.push(Selector::Slice(slice)); + self + } + /// Add a wildcard selector. #[inline(always)] pub fn wildcard(&mut self) -> &mut Self { diff --git a/crates/rsonpath-syntax/src/lib.rs b/crates/rsonpath-syntax/src/lib.rs index c03b552a..ed811ba7 100644 --- a/crates/rsonpath-syntax/src/lib.rs +++ b/crates/rsonpath-syntax/src/lib.rs @@ -351,8 +351,8 @@ pub enum Selector { /// An index selector matches at most one array element value, /// depending on the selector's [`Index`]. Index(Index), - // A slice selector matches elements from arrays starting at a given index, - // ending at a given index, and incrementing with a specified step. + /// A slice selector matches elements from arrays starting at a given index, + /// ending at a given index, and incrementing with a specified step. Slice(Slice), } @@ -392,7 +392,7 @@ impl From for Index { /// Directional step offset within a JSON array. #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] pub enum Step { - // Step forward by a given offset amount. + /// Step forward by a given offset amount. Forward(num::JsonUInt), /// Step backward by a given offset amount. Backward(num::JsonNonZeroUInt), @@ -455,10 +455,7 @@ pub struct Slice { /// # use rsonpath_syntax::{Slice, SliceBuilder, Index, Step, num::JsonUInt}; /// let mut builder = SliceBuilder::new(); /// -/// builder -/// .with_start(Index::FromEnd(3.try_into().unwrap())) -/// .with_end(Index::FromStart(1.into())) -/// .with_step(Step::Backward(7.try_into().unwrap())); +/// builder.with_start(-3).with_end(1).with_step(-7); /// /// let slice: Slice = builder.into(); /// assert_eq!(slice.to_string(), "-3:1:-7"); @@ -523,22 +520,22 @@ impl SliceBuilder { /// Set the start of the [`Slice`]. #[inline] - pub fn with_start(&mut self, start: Index) -> &mut Self { - self.inner.start = start; + pub fn with_start>(&mut self, start: N) -> &mut Self { + self.inner.start = start.into().into(); self } /// Set the end of the [`Slice`]. #[inline] - pub fn with_end(&mut self, end: Index) -> &mut Self { - self.inner.end = Some(end); + pub fn with_end>(&mut self, end: N) -> &mut Self { + self.inner.end = Some(end.into().into()); self } /// Set the step of the [`Slice`]. #[inline] - pub fn with_step(&mut self, step: Step) -> &mut Self { - self.inner.step = step; + pub fn with_step>(&mut self, step: N) -> &mut Self { + self.inner.step = step.into().into(); self } diff --git a/crates/rsonpath-syntax/src/parser.rs b/crates/rsonpath-syntax/src/parser.rs index 864ec9d1..0dc46a23 100644 --- a/crates/rsonpath-syntax/src/parser.rs +++ b/crates/rsonpath-syntax/src/parser.rs @@ -2,7 +2,7 @@ use crate::{ error::{InternalParseError, ParseErrorBuilder, SyntaxError, SyntaxErrorKind}, num::{error::JsonIntParseError, JsonInt, JsonNonZeroUInt, JsonUInt}, str::{JsonString, JsonStringBuilder}, - Index, JsonPathQuery, ParserOptions, Result, Segment, Selector, Selectors, SliceBuilder, Step, + Index, JsonPathQuery, ParserOptions, Result, Segment, Selector, Selectors, Step, }; use nom::{branch::*, bytes::complete::*, character::complete::*, combinator::*, multi::*, sequence::*, *}; use std::{iter::Peekable, str::FromStr}; @@ -254,12 +254,12 @@ fn wildcard_selector(q: &str) -> IResult<&str, Selector, InternalParseError> { fn slice_selector(q: &str) -> IResult<&str, Selector, InternalParseError> { let (rest, opt_start) = terminated(opt(int), ignore_whitespace(char(':')))(q)?; // We have parsed a ':', so this *must* be a slice selector. Any errors after here are fatal. - let mut slice = SliceBuilder::new(); + let mut slice = crate::Slice::default(); if let Some(start_str) = opt_start { match parse_directional_int(start_str) { - DirectionalInt::Plus(int) => slice.with_start(Index::FromStart(int)), - DirectionalInt::Minus(int) => slice.with_start(Index::FromEnd(int)), + DirectionalInt::Plus(int) => slice.start = Index::FromStart(int), + DirectionalInt::Minus(int) => slice.start = Index::FromEnd(int), DirectionalInt::Error(err) => { return fail( SyntaxErrorKind::SliceStartParseError(err), @@ -275,8 +275,8 @@ fn slice_selector(q: &str) -> IResult<&str, Selector, InternalParseError> { if let Some(end_str) = opt_end { match parse_directional_int(end_str) { - DirectionalInt::Plus(int) => slice.with_end(Index::FromStart(int)), - DirectionalInt::Minus(int) => slice.with_end(Index::FromEnd(int)), + DirectionalInt::Plus(int) => slice.end = Some(Index::FromStart(int)), + DirectionalInt::Minus(int) => slice.end = Some(Index::FromEnd(int)), DirectionalInt::Error(err) => { return fail(SyntaxErrorKind::SliceEndParseError(err), q.len(), end_str.len(), rest); } @@ -288,15 +288,15 @@ fn slice_selector(q: &str) -> IResult<&str, Selector, InternalParseError> { if let Some(Some(step_str)) = opt_step { match parse_directional_int(step_str) { - DirectionalInt::Plus(int) => slice.with_step(Step::Forward(int)), - DirectionalInt::Minus(int) => slice.with_step(Step::Backward(int)), + DirectionalInt::Plus(int) => slice.step = Step::Forward(int), + DirectionalInt::Minus(int) => slice.step = Step::Backward(int), DirectionalInt::Error(err) => { return fail(SyntaxErrorKind::SliceStepParseError(err), q.len(), step_str.len(), rest); } }; } - Ok((rest, Selector::Slice(slice.into()))) + Ok((rest, Selector::Slice(slice))) } fn index_selector(q: &str) -> IResult<&str, Selector, InternalParseError> { @@ -655,11 +655,13 @@ mod tests { #[derive(Debug, Clone)] enum SelectorTag { WildcardChild, - Child(String), - WildcardDescendant, - Descendant(String), + NameChild(String), ArrayIndexChild(JsonUInt), + ArraySliceChild(JsonUInt, Option, JsonUInt), + WildcardDescendant, + NameDescendant(String), ArrayIndexDescendant(JsonUInt), + ArraySliceDescendant(JsonUInt, Option, JsonUInt), } #[derive(Debug, Clone)] @@ -724,11 +726,13 @@ mod tests { fn any_selector() -> impl Strategy { prop_oneof![ any_wildcard_child(), - child_any(), - any_wildcard_descendant(), - descendant_any(), + any_child_name(), any_array_index_child(), + any_array_slice_child(), + any_wildcard_descendant(), + any_descendant_name(), any_array_index_descendant(), + any_array_slice_descendant(), ] } @@ -749,18 +753,18 @@ mod tests { } // .label or ['label'] - fn child_any() -> impl Strategy { + fn any_child_name() -> impl Strategy { prop_oneof![any_short_name().prop_map(|x| (format!(".{x}"), x)), any_name(),].prop_map(|(s, l)| Selector { string: s, - tag: SelectorTag::Child(l), + tag: SelectorTag::NameChild(l), }) } // ..label or ..['label'] - fn descendant_any() -> impl Strategy { + fn any_descendant_name() -> impl Strategy { prop_oneof![any_short_name().prop_map(|x| (x.clone(), x)), any_name(),].prop_map(|(x, l)| Selector { string: format!("..{x}"), - tag: SelectorTag::Descendant(l), + tag: SelectorTag::NameDescendant(l), }) } @@ -771,6 +775,22 @@ mod tests { }) } + fn any_array_slice_child() -> impl Strategy { + ( + any_non_negative_array_index(), + proptest::option::of(any_non_negative_array_index()), + any_non_negative_array_index(), + ) + .prop_map(|(start, end, step)| Selector { + string: if let Some(end) = end { + format!("[{}:{}:{}]", start.as_u64(), end.as_u64(), step.as_u64()) + } else { + format!("[{}::{}]", start.as_u64(), step.as_u64()) + }, + tag: SelectorTag::ArraySliceChild(start, end, step), + }) + } + fn any_array_index_descendant() -> impl Strategy { any_non_negative_array_index().prop_map(|i| Selector { string: format!("..[{}]", i.as_u64()), @@ -778,6 +798,22 @@ mod tests { }) } + fn any_array_slice_descendant() -> impl Strategy { + ( + any_non_negative_array_index(), + proptest::option::of(any_non_negative_array_index()), + any_non_negative_array_index(), + ) + .prop_map(|(start, end, step)| Selector { + string: if let Some(end) = end { + format!("..[{}:{}:{}]", start.as_u64(), end.as_u64(), step.as_u64()) + } else { + format!("..[{}::{}]", start.as_u64(), step.as_u64()) + }, + tag: SelectorTag::ArraySliceDescendant(start, end, step), + }) + } + fn any_short_name() -> impl Strategy { r"([A-Za-z]|_|[^\u0000-\u007F])([A-Za-z0-9]|_|[^\u0000-\u007F])*" } @@ -831,12 +867,20 @@ mod tests { result += &selector.string; match selector.tag { + SelectorTag::NameChild(name) => query.child_name(JsonString::new(&name)), + SelectorTag::ArrayIndexChild(idx) => query.child_index(idx), + SelectorTag::ArraySliceChild(start, None, step) => + query.child_slice(|x| x.with_start(start).with_step(step)), + SelectorTag::ArraySliceChild(start, Some(end), step) => + query.child_slice(|x| x.with_start(start).with_end(end).with_step(step)), SelectorTag::WildcardChild => query.child_wildcard(), - SelectorTag::Child(name) => query.child_name(JsonString::new(&name)), + SelectorTag::NameDescendant(name) => query.descendant_name(JsonString::new(&name)), + SelectorTag::ArrayIndexDescendant(idx) => query.descendant_index(idx), + SelectorTag::ArraySliceDescendant(start, None, step) => + query.descendant_slice(|x| x.with_start(start).with_step(step)), + SelectorTag::ArraySliceDescendant(start, Some(end), step) => + query.descendant_slice(|x| x.with_start(start).with_end(end).with_step(step)), SelectorTag::WildcardDescendant => query.descendant_wildcard(), - SelectorTag::Descendant(name) => query.descendant_name(JsonString::new(&name)), - SelectorTag::ArrayIndexChild(idx) => query.child_index(idx), - SelectorTag::ArrayIndexDescendant(idx) => query.descendant_index(idx) }; } @@ -855,6 +899,14 @@ mod tests { assert_eq!(expected, result); } + + #[test] + fn round_trip((_, query) in any_valid_query()) { + let input = query.to_string(); + let result = crate::parse(&input).expect("expected Ok"); + + assert_eq!(query, result); + } } } } diff --git a/crates/rsonpath-syntax/tests/query_parser_tests.rs b/crates/rsonpath-syntax/tests/query_parser_tests.rs index 7d7d800a..3247ca1a 100644 --- a/crates/rsonpath-syntax/tests/query_parser_tests.rs +++ b/crates/rsonpath-syntax/tests/query_parser_tests.rs @@ -1,5 +1,5 @@ use pretty_assertions::assert_eq; -use rsonpath_syntax::{builder::JsonPathQueryBuilder, num::JsonUInt, str::JsonString, JsonPathQuery}; +use rsonpath_syntax::{builder::JsonPathQueryBuilder, num::JsonUInt, str::JsonString}; use test_case::test_case; #[test] @@ -166,230 +166,3 @@ fn name_and_wildcard_selectors_bracketed_and_raw() { assert_eq!(result, expected_query); } - -mod proptests { - use super::*; - use proptest::{prelude::*, sample::SizeRange}; - use rsonpath_syntax::num::JsonUInt; - - /* Approach: we generate a sequence of Selectors, each having its generated string - * and a tag describing what selector it represents, and, optionally, what string is attached. - * This can then easily be turned into the input (the string is attached) and the expected - * parser result (transform the sequence of tags). - */ - - #[derive(Debug, Clone)] - enum SelectorTag { - WildcardChild, - Child(String), - WildcardDescendant, - Descendant(String), - ArrayIndexChild(JsonUInt), - ArrayIndexDescendant(JsonUInt), - } - - #[derive(Debug, Clone)] - struct Selector { - string: String, - tag: SelectorTag, - } - - #[derive(Debug, PartialEq, Eq, Clone, Copy)] - enum JsonStringToken { - EncodeNormally(char), - ForceUnicodeEscape(char), - } - - #[derive(Debug, PartialEq, Eq, Clone, Copy)] - enum JsonStringTokenEncodingMode { - SingleQuoted, - DoubleQuoted, - } - - impl JsonStringToken { - fn raw(self) -> char { - match self { - Self::EncodeNormally(x) | Self::ForceUnicodeEscape(x) => x, - } - } - - fn encode(self, mode: JsonStringTokenEncodingMode) -> String { - return match self { - JsonStringToken::EncodeNormally('\u{0008}') => r"\b".to_owned(), - JsonStringToken::EncodeNormally('\t') => r"\t".to_owned(), - JsonStringToken::EncodeNormally('\n') => r"\n".to_owned(), - JsonStringToken::EncodeNormally('\u{000C}') => r"\f".to_owned(), - JsonStringToken::EncodeNormally('\r') => r"\r".to_owned(), - JsonStringToken::EncodeNormally('"') => match mode { - JsonStringTokenEncodingMode::DoubleQuoted => r#"\""#.to_owned(), - JsonStringTokenEncodingMode::SingleQuoted => r#"""#.to_owned(), - }, - JsonStringToken::EncodeNormally('\'') => match mode { - JsonStringTokenEncodingMode::DoubleQuoted => r#"'"#.to_owned(), - JsonStringTokenEncodingMode::SingleQuoted => r#"\'"#.to_owned(), - }, - JsonStringToken::EncodeNormally('/') => r"\/".to_owned(), - JsonStringToken::EncodeNormally('\\') => r"\\".to_owned(), - JsonStringToken::EncodeNormally(c @ ..='\u{001F}') => encode_unicode_escape(c), - JsonStringToken::EncodeNormally(c) => c.to_string(), - JsonStringToken::ForceUnicodeEscape(c) => encode_unicode_escape(c), - }; - - fn encode_unicode_escape(c: char) -> String { - let mut buf = [0; 2]; - let enc = c.encode_utf16(&mut buf); - let mut res = String::new(); - for x in enc { - res += &format!("\\u{x:0>4x}"); - } - res - } - } - } - - // Cspell: disable - fn any_selector() -> impl Strategy { - prop_oneof![ - any_wildcard_child(), - child_any(), - any_wildcard_descendant(), - descendant_any(), - any_array_index_child(), - any_array_index_descendant(), - ] - } - - // .* or [*] - fn any_wildcard_child() -> impl Strategy { - r"(\.\*|\[\*\])".prop_map(|x| Selector { - string: x, - tag: SelectorTag::WildcardChild, - }) - } - - // ..* or ..[*] - fn any_wildcard_descendant() -> impl Strategy { - r"(\*|\[\*\])".prop_map(|x| Selector { - string: format!("..{x}"), - tag: SelectorTag::WildcardDescendant, - }) - } - - // .label or ['label'] - fn child_any() -> impl Strategy { - prop_oneof![any_short_name().prop_map(|x| (format!(".{x}"), x)), any_name(),].prop_map(|(s, l)| Selector { - string: s, - tag: SelectorTag::Child(l), - }) - } - - // ..label or ..['label'] - fn descendant_any() -> impl Strategy { - prop_oneof![any_short_name().prop_map(|x| (x.clone(), x)), any_name(),].prop_map(|(x, l)| Selector { - string: format!("..{x}"), - tag: SelectorTag::Descendant(l), - }) - } - - fn any_array_index_child() -> impl Strategy { - any_non_negative_array_index().prop_map(|i| Selector { - string: format!("[{}]", i.as_u64()), - tag: SelectorTag::ArrayIndexChild(i), - }) - } - - fn any_array_index_descendant() -> impl Strategy { - any_non_negative_array_index().prop_map(|i| Selector { - string: format!("..[{}]", i.as_u64()), - tag: SelectorTag::ArrayIndexDescendant(i), - }) - } - - fn any_short_name() -> impl Strategy { - r"([A-Za-z]|_|[^\u0000-\u007F])([A-Za-z0-9]|_|[^\u0000-\u007F])*" - } - - fn any_name() -> impl Strategy { - prop_oneof![ - Just(JsonStringTokenEncodingMode::SingleQuoted), - Just(JsonStringTokenEncodingMode::DoubleQuoted) - ] - .prop_flat_map(|mode| { - prop::collection::vec( - (prop::char::any(), prop::bool::ANY).prop_map(|(c, b)| { - if b { - JsonStringToken::EncodeNormally(c) - } else { - JsonStringToken::ForceUnicodeEscape(c) - } - }), - SizeRange::default(), - ) - .prop_map(move |v| { - let q = match mode { - JsonStringTokenEncodingMode::SingleQuoted => '\'', - JsonStringTokenEncodingMode::DoubleQuoted => '"', - }; - let mut s = String::new(); - let mut l = String::new(); - for x in v { - s += &x.encode(mode); - l.push(x.raw()); - } - (format!("[{q}{s}{q}]"), l) - }) - }) - } - - fn any_non_negative_array_index() -> impl Strategy { - const MAX: u64 = (1 << 53) - 1; - (0..MAX).prop_map(|x| JsonUInt::try_from(x).expect("in-range JsonUInt")) - } - // Cspell: enable - - prop_compose! { - fn any_valid_query()(selectors in prop::collection::vec(any_selector(), 0..20)) -> (String, JsonPathQuery) { - let mut result: String = String::new(); - let mut query = JsonPathQueryBuilder::new(); - - result += "$"; - - for selector in selectors { - result += &selector.string; - - match selector.tag { - SelectorTag::WildcardChild => query.child_wildcard(), - SelectorTag::Child(name) => query.child_name(JsonString::new(&name)), - SelectorTag::WildcardDescendant => query.descendant_wildcard(), - SelectorTag::Descendant(name) => query.descendant_name(JsonString::new(&name)), - SelectorTag::ArrayIndexChild(idx) => query.child_index(idx), - SelectorTag::ArrayIndexDescendant(idx) => query.descendant_index(idx) - }; - } - - (result, query.into()) - } - } - - mod correct_strings { - use super::*; - use pretty_assertions::assert_eq; - - proptest! { - #[test] - fn parses_expected_query((input, expected) in any_valid_query()) { - let result = rsonpath_syntax::parse(&input).expect("expected Ok"); - - assert_eq!(expected, result); - } - - #[test] - fn round_trip((_, query) in any_valid_query()) { - let input = query.to_string(); - let result = rsonpath_syntax::parse(&input).expect("expected Ok"); - - assert_eq!(query, result); - } - } - } -}