Skip to content

Commit

Permalink
feat!: refactor the Input implementors with automatic padding
Browse files Browse the repository at this point in the history
- Padding and alignment is now handled automatically by the input types,
allowing them to work safely without copying the entire input. The overhead is now
limited to the padding, which is at most 256 bytes in total.
- [`BorrowedBytes`](https://docs.rs/rsonpath-lib/0.8.4/rsonpath/input/borrowed/struct.BorrowedBytes.html) is now safe to construct.
- [`OwnedBytes`](https://docs.rs/rsonpath-lib/0.8.4/rsonpath/input/owned/struct.OwnedBytes.html) no longer copies
the entire source on construction.

Ref: #276
  • Loading branch information
V0ldek authored Oct 30, 2023
1 parent fa5445e commit 63056ec
Show file tree
Hide file tree
Showing 53 changed files with 2,835 additions and 1,053 deletions.
12 changes: 8 additions & 4 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,13 @@ jobs:
# This target needs special setup with MinGW.
needs-mingw: x86
can-run: true
rustflags: "-C link-arg=-fuse-ld=lld --deny warnings"
# lld on Windows uses extreme amounts of memory for debuginfo=2
rustflags: "-C link-arg=-fuse-ld=lld -C debuginfo=1 --deny warnings"
- os: windows-latest
target_triple: i686-pc-windows-msvc
can-run: true
rustflags: "-C link-arg=-fuse-ld=lld --deny warnings"
# lld on Windows uses extreme amounts of memory for debuginfo=2
rustflags: "-C link-arg=-fuse-ld=lld -C debuginfo=1 --deny warnings"
- os: ubuntu-latest
target_triple:
i686-unknown-linux-gnu
Expand All @@ -74,11 +76,13 @@ jobs:
- os: windows-latest
target_triple: x86_64-pc-windows-gnu
can-run: true
rustflags: "-C link-arg=-fuse-ld=lld --deny warnings"
# lld on Windows uses extreme amounts of memory for debuginfo=2
rustflags: "-C link-arg=-fuse-ld=lld -C debuginfo=1 --deny warnings"
- os: windows-latest
target_triple: x86_64-pc-windows-msvc
can-run: true
rustflags: "-C link-arg=-fuse-ld=lld --deny warnings"
# lld on Windows uses extreme amounts of memory for debuginfo=2
rustflags: "-C link-arg=-fuse-ld=lld -C debuginfo=1 --deny warnings"
- os: ubuntu-latest
target_triple: x86_64-unknown-linux-gnu
can-run: true
Expand Down
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
"rustdoc",
"RUSTFLAGS",
"rustfmt",
"Seekable",
"SIMD",
"smallvec",
"snaks",
Expand Down
16 changes: 8 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ lto = false
# Release should be used for benching, but not actually distributed.
[profile.release]
lto = "thin"
debug = 2
debug = 1

# This is the profile used for final binaries distributed via package managers.
# It prioritizes performance, and then binary size. We generally don't care about
Expand Down
10 changes: 3 additions & 7 deletions Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,11 @@ test-engine: (gen-tests)

# Run the input tests on default features.
test-input:
cargo test --test input_implementation -q
cargo test --test input_implementation_tests -q

# Run the query tests on default features.
test-parser:
cargo test --test query_parser -q
cargo test --test query_parser_tests -q

# Run all tests, including real dataset tests, on the feature powerset of the project.
test-full: (gen-tests)
Expand Down Expand Up @@ -241,12 +241,8 @@ commit msg:
[private]
hook-pre-commit:
#!/bin/sh
tmpdiff=$(mktemp -t pre-commit-hook-diff-XXXXXXXX.$$)
just assert-benchmarks-committed
git diff --full-index --binary > $tmpdiff
git stash -q --keep-index
(just verify-fmt && just verify-check); \
git apply --whitespace=nowarn < $tmpdiff}} && git stash drop -q; rm $tmpdiff
(just verify-fmt && just verify-check);

[private]
@hook-post-checkout: checkout-benchmarks
Expand Down
4 changes: 2 additions & 2 deletions book/src/lib/intro.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ _This part of the book is a work in progress._
```rust
# extern crate rsonpath;
use rsonpath::engine::{Compiler, Engine, RsonpathEngine};
use rsonpath::input::OwnedBytes;
use rsonpath::input::BorrowedBytes;
use rsonpath::query::JsonPathQuery;

# fn main() -> Result<(), Box<dyn std::error::Error>> {
Expand All @@ -29,7 +29,7 @@ let contents = r#"
}
}"#;

let input = OwnedBytes::new(&contents)?;
let input = BorrowedBytes::new(contents.as_bytes());
let engine = RsonpathEngine::compile_query(&query)?;
let count = engine.count(&input)?;

Expand Down
2 changes: 1 addition & 1 deletion crates/rsonpath-benchmarks
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 34c43f30d1bf155cf3ee0e13c9776fc11c4dac1f3f19aa602c4c50df5f0e7049 # shrinks to (input, expected) = (",", [Comma(0)])
cc 24036ddd437694148c11fa1c274956e48ed3792ea084f3923deaa4225d030700 # shrinks to (input, expected) = (",", [Comma(0)])
Original file line number Diff line number Diff line change
Expand Up @@ -3,41 +3,47 @@ use crate::{
simd::{self, Simd},
structural::{BracketType, Structural, StructuralIterator},
},
input::BorrowedBytes,
input::Input,
input::OwnedBytes,
result::empty::EmptyRecorder,
FallibleIterator,
};

use super::simd::config_simd;

fn classify_string(json: &str) -> Vec<Structural> {
fn classify_string(json: &str) -> (Vec<Structural>, usize) {
let simd = simd::configure();

config_simd!(simd => |simd| {
let json_string = json.to_owned();
let bytes = OwnedBytes::try_from(json_string).unwrap();
let bytes = BorrowedBytes::new(json_string.as_bytes());
let iter = bytes.iter_blocks(&EmptyRecorder);
let quotes_classifier = simd.classify_quoted_sequences(iter);
let mut structural_classifier = simd.classify_structural_characters(quotes_classifier);
structural_classifier.turn_commas_on(0);
structural_classifier.turn_colons_on(0);

structural_classifier.collect().unwrap()
(structural_classifier.collect().unwrap(), bytes.leading_padding_len())
})
}

fn apply_offset(vec: &mut [Structural], offset: usize) {
for x in vec {
*x = x.offset(offset);
}
}

#[test]
fn empty_string() {
let result = classify_string("");
let (result, _) = classify_string("");

assert_eq!(Vec::<Structural>::default(), result);
}

#[test]
fn json() {
let json = r#"{"a": [1, 2, 3], "b": "string", "c": {"d": 42, "e": 17}}"#;
let expected: &[Structural] = &[
let expected: &mut [Structural] = &mut [
Structural::Opening(BracketType::Curly, 0),
Structural::Colon(4),
Structural::Opening(BracketType::Square, 6),
Expand All @@ -56,23 +62,25 @@ fn json() {
Structural::Closing(BracketType::Curly, 55),
];

let result = classify_string(json);
let (result, offset) = classify_string(json);
apply_offset(expected, offset);

assert_eq!(expected, result);
}

#[test]
fn json_with_escapes() {
let json = r#"{"a": "Hello, World!", "b": "\"{Hello, [World]!}\""}"#;
let expected: &[Structural] = &[
let expected: &mut [Structural] = &mut [
Structural::Opening(BracketType::Curly, 0),
Structural::Colon(4),
Structural::Comma(21),
Structural::Colon(26),
Structural::Closing(BracketType::Curly, 51),
];

let result = classify_string(json);
let (result, offset) = classify_string(json);
apply_offset(expected, offset);

assert_eq!(expected, result);
}
Expand All @@ -82,7 +90,7 @@ fn reverse_exclamation_point() {
let wtf = "¡";
let expected: &[Structural] = &[];

let result = classify_string(wtf);
let (result, _) = classify_string(wtf);

assert_eq!(expected, result);
}
Expand All @@ -92,7 +100,7 @@ fn block_boundary() {
use Structural::*;

let wtf = r##",,#;0a#0,#a#0#0aa ;a0 0a,"A"#a~A#0a~A##a0|a0#0aaa~ 0#;A|~|"a"A-|;#0 Aa,,"0","A"A0,,,,,,,,,,,,,,,"a",AA;#|#|a;AAA;a A~;aA;A##A#~a ,,,,,,0^A-AA0aa;- ~0,,,#;A;aA#A#0 a-, a;0aaa0|a 0aA -A#a,,,,"\\","##;
let expected: &[Structural] = &[
let expected: &mut [Structural] = &mut [
Comma(0),
Comma(1),
Comma(8),
Expand Down Expand Up @@ -133,13 +141,14 @@ fn block_boundary() {
Comma(193),
];

let result = classify_string(wtf);
let (result, offset) = classify_string(wtf);
apply_offset(expected, offset);

assert_eq!(expected, result);
}

mod prop_test {
use super::{classify_string, BracketType, Structural};
use super::{apply_offset, classify_string, BracketType, Structural};
use proptest::{self, collection, prelude::*};
use std::fmt::Debug;

Expand Down Expand Up @@ -275,15 +284,17 @@ mod prop_test {

proptest! {
#[test]
fn classifies_correctly_ascii((input, expected) in input_string_ascii()) {
let result = classify_string(&input);
fn classifies_correctly_ascii((input, mut expected) in input_string_ascii()) {
let (result, offset) = classify_string(&input);
apply_offset(&mut expected, offset);

assert_eq!(expected, result);
}

#[test]
fn classifies_correctly_all((input, expected) in input_string_all()) {
let result = classify_string(&input);
fn classifies_correctly_all((input, mut expected) in input_string_all()) {
let (result, offset) = classify_string(&input);
apply_offset(&mut expected, offset);

assert_eq!(expected, result);
}
Expand Down
4 changes: 2 additions & 2 deletions crates/rsonpath-lib/src/classification/memmem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ pub(crate) trait MemmemImpl {
type Classifier<'i, 'b, 'r, I, R>: Memmem<'i, 'b, 'r, I, BLOCK_SIZE>
where
I: Input + 'i,
<I as Input>::BlockIterator<'i, 'r, BLOCK_SIZE, R>: 'b,
<I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>: 'b,
R: InputRecorder<<I as Input>::Block<'i, BLOCK_SIZE>> + 'r,
'i: 'r;

fn memmem<'i, 'b, 'r, I, R>(
input: &'i I,
iter: &'b mut <I as Input>::BlockIterator<'i, 'r, BLOCK_SIZE, R>,
iter: &'b mut <I as Input>::BlockIterator<'i, 'r, R, BLOCK_SIZE>,
) -> Self::Classifier<'i, 'b, 'r, I, R>
where
I: Input,
Expand Down
Loading

0 comments on commit 63056ec

Please sign in to comment.