Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(expr): support jsonb @>, <@, ?, ?| and ?& operator #13056

Merged
merged 12 commits into from
Oct 30, 2023
10 changes: 10 additions & 0 deletions proto/expr.proto
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,16 @@ message ExprNode {
JSONB_ARRAY_LENGTH = 603;
IS_JSON = 604;
JSONB_CAT = 605;
// jsonb @> jsonb
JSONB_CONTAINS = 608;
// jsonb <@ jsonb
JSONB_CONTAINED_BY = 609;
// jsonb ? text
JSONB_CONTAINS_KEY = 610;
// jsonb ?| text[]
JSONB_CONTAINS_ANY_KEY = 611;
// jsonb ?& text[]
JSONB_CONTAINS_ALL_KEYS = 612;

// Non-pure functions below (> 1000)
// ------------------------
Expand Down
273 changes: 273 additions & 0 deletions src/expr/impl/src/scalar/jsonb_contains.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
// Copyright 2023 RisingWave Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use jsonbb::ValueRef;
use risingwave_common::types::{JsonbRef, ListRef};
use risingwave_expr::function;

/// Does the first JSON value contain the second?
///
/// Examples:
///
/// ```slt
/// # Simple scalar/primitive values contain only the identical value:
/// query B
/// SELECT '"foo"'::jsonb @> '"foo"'::jsonb;
/// ----
/// t
///
/// # The array on the right side is contained within the one on the left:
/// query B
/// SELECT '[1, 2, 3]'::jsonb @> '[1, 3]'::jsonb;
/// ----
/// t
///
/// # Order of array elements is not significant, so this is also true:
/// query B
/// SELECT '[1, 2, 3]'::jsonb @> '[3, 1]'::jsonb;
/// ----
/// t
///
/// # Duplicate array elements don't matter either:
/// query B
/// SELECT '[1, 2, 3]'::jsonb @> '[1, 2, 2]'::jsonb;
/// ----
/// t
///
/// # The object with a single pair on the right side is contained
/// # within the object on the left side:
/// query B
/// SELECT '{"product": "PostgreSQL", "version": 9.4, "jsonb": true}'::jsonb @> '{"version": 9.4}'::jsonb;
/// ----
/// t
///
/// # The array on the right side is not considered contained within the
/// # array on the left, even though a similar array is nested within it:
/// query B
/// SELECT '[1, 2, [1, 3]]'::jsonb @> '[1, 3]'::jsonb;
/// ----
/// f
///
/// # But with a layer of nesting, it is contained:
/// query B
/// SELECT '[1, 2, [1, 3]]'::jsonb @> '[[1, 3]]'::jsonb;
/// ----
/// t
///
/// # Similarly, containment is not reported here:
/// query B
/// SELECT '{"foo": {"bar": "baz"}}'::jsonb @> '{"bar": "baz"}'::jsonb;
/// ----
/// f
///
/// # A top-level key and an empty object is contained:
/// query B
/// SELECT '{"foo": {"bar": "baz"}}'::jsonb @> '{"foo": {}}'::jsonb;
/// ----
/// t
///
/// # This array contains the primitive string value:
/// query B
/// SELECT '["foo", "bar"]'::jsonb @> '"bar"'::jsonb;
/// ----
/// t
///
/// # This exception is not reciprocal -- non-containment is reported here:
/// query B
/// SELECT '"bar"'::jsonb @> '["bar"]'::jsonb;
/// ----
/// f
///
/// # Object is not primitive:
/// query B
/// SELECT '[1, {"a":2}]'::jsonb @> '{"a":2}';
/// ----
/// f
///
/// # Array can be nested:
/// query B
/// SELECT '[1, [3, 4]]'::jsonb @> '[[3]]';
/// ----
/// t
///
/// # Recursion shall not include the special rule of array containing primitive:
/// query B
/// SELECT '{"a": [3, 4]}'::jsonb @> '{"a": 3}';
/// ----
/// f
/// ```
#[function("jsonb_contains(jsonb, jsonb) -> boolean")]
fn jsonb_contains(left: JsonbRef<'_>, right: JsonbRef<'_>) -> bool {
jsonbb_contains(left.into(), right.into(), true)
}

/// Performs `jsonb_contains` on `jsonbb::ValueRef`.
/// `root` indicates whether the current recursion is at the root level.
fn jsonbb_contains(left: ValueRef<'_>, right: ValueRef<'_>, root: bool) -> bool {
match (left, right) {
// Both left and right are objects.
(ValueRef::Object(left_obj), ValueRef::Object(right_obj)) => {
// Every key-value pair in right should be present in left.
right_obj.iter().all(|(key, value)| {
left_obj
.get(key)
.map_or(false, |left_val| jsonbb_contains(left_val, value, false))
})
}

// Both left and right are arrays.
(ValueRef::Array(left_arr), ValueRef::Array(right_arr)) => {
// For every value in right, there should be an equivalent in left.
right_arr.iter().all(|right_val| {
left_arr
.iter()
.any(|left_val| jsonbb_contains(left_val, right_val, false))
})
}

// Left is an array and right is an object.
(ValueRef::Array(_), ValueRef::Object(_)) => false,

// Left is an array and right is a primitive value. only at root level.
(ValueRef::Array(left_arr), right_val) if root => {
// The right should be present in left.
left_arr.iter().any(|left_val| left_val == right_val)
}

// Both left and right are primitive values.
(left_val, right_val) => left_val == right_val,
}
}

/// Is the first JSON value contained in the second?
///
/// Examples:
///
/// ```slt
/// query B
/// select '{"b":2}'::jsonb <@ '{"a":1, "b":2}'::jsonb;
/// ----
/// t
/// ```
#[function("jsonb_contained_by(jsonb, jsonb) -> boolean")]
fn jsonb_contained_by(left: JsonbRef<'_>, right: JsonbRef<'_>) -> bool {
jsonb_contains(right, left)
}

/// Does the text string exist as a top-level key or array element within the JSON value?
///
/// Examples:
///
/// ```slt
/// # String exists as array element:
/// query B
/// SELECT '["foo", "bar", "baz"]'::jsonb ? 'bar';
/// ----
/// t
///
/// # String exists as object key:
/// query B
/// SELECT '{"foo": "bar"}'::jsonb ? 'foo';
/// ----
/// t
///
/// # Object values are not considered:
/// query B
/// SELECT '{"foo": "bar"}'::jsonb ? 'bar';
/// ----
/// f
///
/// # As with containment, existence must match at the top level:
/// query B
/// SELECT '{"foo": {"bar": "baz"}}'::jsonb ? 'bar';
/// ----
/// f
///
/// # A string is considered to exist if it matches a primitive JSON string:
/// query B
/// SELECT '"foo"'::jsonb ? 'foo';
/// ----
/// t
/// ```
#[function("jsonb_contains_key(jsonb, varchar) -> boolean")]
fn jsonb_contains_key(left: JsonbRef<'_>, key: &str) -> bool {
match left.into() {
ValueRef::Object(object) => object.get(key).is_some(),
ValueRef::Array(array) => array.iter().any(|val| val.as_str() == Some(key)),
ValueRef::String(str) => str == key,
_ => false,
}
}

/// Do any of the strings in the text array exist as top-level keys or array elements?
///
/// Examples:
///
/// ```slt
/// query B
/// select '{"a":1, "b":2, "c":3}'::jsonb ?| array['b', 'd'];
/// ----
/// t
///
/// query B
/// select '["a", "b", "c"]'::jsonb ?| array['b', 'd'];
/// ----
/// t
///
/// query B
/// select '"b"'::jsonb ?| array['b', 'd'];
/// ----
/// t
/// ```
#[function("jsonb_contains_any_key(jsonb, varchar[]) -> boolean")]
fn jsonb_contains_any_key(left: JsonbRef<'_>, keys: ListRef<'_>) -> bool {
let mut keys = keys.iter().flatten().map(|val| val.into_utf8());
match left.into() {
ValueRef::Object(object) => keys.any(|key| object.get(key).is_some()),
ValueRef::Array(array) => keys.any(|key| array.iter().any(|val| val.as_str() == Some(key))),
ValueRef::String(str) => keys.any(|key| str == key),
_ => false,
}
}

/// Do all of the strings in the text array exist as top-level keys or array elements?
///
/// Examples:
///
/// ```slt
/// query B
/// select '{"a":1, "b":2, "c":3}'::jsonb ?& array['a', 'b'];
/// ----
/// t
///
/// query B
/// select '["a", "b", "c"]'::jsonb ?& array['a', 'b'];
/// ----
/// t
///
/// query B
/// select '"b"'::jsonb ?& array['b'];
/// ----
/// t
/// ```
#[function("jsonb_contains_all_keys(jsonb, varchar[]) -> boolean")]
fn jsonb_contains_all_keys(left: JsonbRef<'_>, keys: ListRef<'_>) -> bool {
let mut keys = keys.iter().flatten().map(|val| val.into_utf8());
match left.into() {
ValueRef::Object(object) => keys.all(|key| object.get(key).is_some()),
ValueRef::Array(array) => keys.all(|key| array.iter().any(|val| val.as_str() == Some(key))),
ValueRef::String(str) => keys.all(|key| str == key),
_ => false,
}
}
1 change: 1 addition & 0 deletions src/expr/impl/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ mod format_type;
mod int256;
mod jsonb_access;
mod jsonb_concat;
mod jsonb_contains;
mod jsonb_info;
mod length;
mod lower;
Expand Down
5 changes: 5 additions & 0 deletions src/frontend/src/binder/expr/binary_op.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ impl Binder {
BinaryOperator::Arrow => ExprType::JsonbAccessInner,
BinaryOperator::LongArrow => ExprType::JsonbAccessStr,
BinaryOperator::Prefix => ExprType::StartsWith,
BinaryOperator::Contains => ExprType::JsonbContains,
BinaryOperator::ContainedBy => ExprType::JsonbContainedBy,
BinaryOperator::ContainsKey => ExprType::JsonbContainsKey,
BinaryOperator::ContainsAnyKey => ExprType::JsonbContainsAnyKey,
BinaryOperator::ContainsAllKeys => ExprType::JsonbContainsAllKeys,
BinaryOperator::Concat => {
let left_type = (!bound_left.is_untyped()).then(|| bound_left.return_type());
let right_type = (!bound_right.is_untyped()).then(|| bound_right.return_type());
Expand Down
5 changes: 5 additions & 0 deletions src/frontend/src/expr/pure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,11 @@ impl ExprVisitor for ImpureAnalyzer {
| expr_node::Type::JsonbAccessStr
| expr_node::Type::JsonbTypeof
| expr_node::Type::JsonbArrayLength
| expr_node::Type::JsonbContains
| expr_node::Type::JsonbContainedBy
| expr_node::Type::JsonbContainsKey
| expr_node::Type::JsonbContainsAnyKey
| expr_node::Type::JsonbContainsAllKeys
| expr_node::Type::IsJson
| expr_node::Type::Sind
| expr_node::Type::Cosd
Expand Down
10 changes: 10 additions & 0 deletions src/sqlparser/src/ast/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ pub enum BinaryOperator {
LongArrow,
HashArrow,
HashLongArrow,
Contains,
ContainedBy,
ContainsKey,
ContainsAnyKey,
ContainsAllKeys,
PGQualified(Box<QualifiedOperator>),
}

Expand Down Expand Up @@ -143,6 +148,11 @@ impl fmt::Display for BinaryOperator {
BinaryOperator::LongArrow => "->>",
BinaryOperator::HashArrow => "#>",
BinaryOperator::HashLongArrow => "#>>",
BinaryOperator::Contains => "@>",
BinaryOperator::ContainedBy => "<@",
BinaryOperator::ContainsKey => "?",
BinaryOperator::ContainsAnyKey => "?|",
BinaryOperator::ContainsAllKeys => "?&",
BinaryOperator::PGQualified(_) => unreachable!(),
})
}
Expand Down
12 changes: 11 additions & 1 deletion src/sqlparser/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1397,6 +1397,11 @@ impl Parser {
Token::LongArrow => Some(BinaryOperator::LongArrow),
Token::HashArrow => Some(BinaryOperator::HashArrow),
Token::HashLongArrow => Some(BinaryOperator::HashLongArrow),
Token::AtArrow => Some(BinaryOperator::Contains),
Token::ArrowAt => Some(BinaryOperator::ContainedBy),
Token::QuestionMark => Some(BinaryOperator::ContainsKey),
Token::QuestionMarkPipe => Some(BinaryOperator::ContainsAnyKey),
Token::QuestionMarkAmpersand => Some(BinaryOperator::ContainsAllKeys),
Token::Word(w) => match w.keyword {
Keyword::AND => Some(BinaryOperator::And),
Keyword::OR => Some(BinaryOperator::Or),
Expand Down Expand Up @@ -1735,7 +1740,12 @@ impl Parser {
| Token::Arrow
| Token::LongArrow
| Token::HashArrow
| Token::HashLongArrow => Ok(P::Other),
| Token::HashLongArrow
| Token::AtArrow
| Token::ArrowAt
| Token::QuestionMark
| Token::QuestionMarkPipe
| Token::QuestionMarkAmpersand => Ok(P::Other),
Token::Word(w)
if w.keyword == Keyword::OPERATOR && self.peek_nth_token(1) == Token::LParen =>
{
Expand Down
Loading
Loading