Skip to content

Commit

Permalink
fix: child slice selectors only selecting first matching index
Browse files Browse the repository at this point in the history
- Fixed a bug where the compiler would erronously mark states
with a single slice transition as unitary, even though such
transitions could match more than one index.
  • Loading branch information
V0ldek committed Apr 3, 2024
1 parent dbc6f84 commit 2f68498
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 4 deletions.
9 changes: 9 additions & 0 deletions crates/rsonpath-lib/src/automaton.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,15 @@ impl ArrayTransitionLabel {
Self::Slice(s) => s.contains(index),
}
}

fn matches_at_most_once(&self) -> bool {
match self {
Self::Index(_) => true,
Self::Slice(slice) => {
slice.step == JsonUInt::ZERO && slice.end.map_or(false, |end| slice.start.as_u64() + 1 >= end.as_u64())
}
}
}
}

impl From<JsonUInt> for ArrayTransitionLabel {
Expand Down
64 changes: 60 additions & 4 deletions crates/rsonpath-lib/src/automaton/minimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,7 @@ impl<'q> Minimizer<'q> {
debug!("{id} is rejecting");
attrs = attrs.rejecting();
}
if array_transitions.len() + member_transitions.len() == 1 && fallback == Self::rejecting_state() {
debug!("{id} is unitary");
attrs = attrs.unitary();
}

if self.accepting.contains(fallback.0)
|| array_transitions
.iter()
Expand All @@ -213,6 +210,23 @@ impl<'q> Minimizer<'q> {
attrs = attrs.has_array_transition_to_accepting();
}

// Unitarity check:
// 1. Fallback rejects.
// 2. Only one transition that can match at most one element in a JSON, either:
// a) member transition; or
// b) array transition that matches only one index.
let is_unitary = {
fallback == Self::rejecting_state()
&& ((member_transitions.len() == 1 && array_transitions.is_empty())
|| (array_transitions.len() == 1
&& member_transitions.is_empty()
&& array_transitions[0].label.matches_at_most_once()))
};
if is_unitary {
debug!("{id} is unitary");
attrs = attrs.unitary();
}

attrs.into()
}

Expand Down Expand Up @@ -1260,6 +1274,48 @@ mod tests {
assert_eq!(result, expected);
}

#[test]
fn direct_slice() {
// Query = $[1:3]
let label = SimpleSlice::new(1.into(), Some(3.into()), 1.into());

let nfa = NondeterministicAutomaton {
ordered_states: vec![
NfaState::Direct(nfa::Transition::Array(label.into())),
NfaState::Accepting,
],
};

let mut result = minimize(nfa).unwrap();
make_canonical(&mut result);
let expected = Automaton {
states: vec![
StateTable {
array_transitions: smallvec![],
member_transitions: smallvec![],
fallback_state: State(0),
attributes: StateAttributes::REJECTING,
},
StateTable {
array_transitions: smallvec![ArrayTransition::new(ArrayTransitionLabel::Slice(label), State(2)),],
member_transitions: smallvec![],
fallback_state: State(0),
attributes: StateAttributes::TRANSITIONS_TO_ACCEPTING
| StateAttributes::HAS_ARRAY_TRANSITION
| StateAttributes::HAS_ARRAY_TRANSITION_TO_ACCEPTING,
},
StateTable {
array_transitions: smallvec![],
member_transitions: smallvec![],
fallback_state: State(0),
attributes: StateAttributes::ACCEPTING,
},
],
};

assert_eq!(result, expected);
}

/// DFA creation is unstable - it can result in many different isomorphic automaton structures.
/// This function relabels the states in a canonical way so that they can be compared for equality.
fn make_canonical(dfa: &mut Automaton) {
Expand Down
28 changes: 28 additions & 0 deletions crates/rsonpath-test/documents/toml/lists.toml
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,31 @@ nodes = ['''[
0
]
]''']

[[queries]]
query = "$[1:3]"
description = "select the second and third elements"

[queries.results]
count = 2
spans = [[31, 33], [39, 278]]
nodes = [
'[]',
'''[
[],
[
[
[],
0
],
[
[],
0
],
[
[],
0
]
]
]'''
]

0 comments on commit 2f68498

Please sign in to comment.