diff --git a/Cargo.lock b/Cargo.lock index 0b7d0573bf22a..cd4a257361514 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4137,9 +4137,9 @@ dependencies = [ [[package]] name = "jsonbb" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44376417b2ff0cd879b5c84976fa9e0855c316321b4e0502e33e52963bf84f74" +checksum = "efd95430271266a57cbb8fd31115559c853fcaa5f367d32c4720034f7bd37b7f" dependencies = [ "bytes", "serde", diff --git a/proto/expr.proto b/proto/expr.proto index 7998f2fe8128a..a5956803f2d1a 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -222,6 +222,13 @@ message ExprNode { JSONB_CAT = 605; JSONB_OBJECT = 606; JSONB_PRETTY = 607; + // see SUBTRACT for: + // jsonb - text -> jsonb + // jsonb - text[] -> jsonb + // jsonb - integer -> jsonb + // + // jsonb #- text[] -> jsonb + JSONB_REMOVE_PATH = 613; // Non-pure functions below (> 1000) // ------------------------ diff --git a/src/expr/impl/src/scalar/jsonb_remove.rs b/src/expr/impl/src/scalar/jsonb_remove.rs new file mode 100644 index 0000000000000..3bdb2d29e83a2 --- /dev/null +++ b/src/expr/impl/src/scalar/jsonb_remove.rs @@ -0,0 +1,337 @@ +// Copyright 2023 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; + +use jsonbb::{Value, ValueRef}; +use risingwave_common::types::{JsonbRef, JsonbVal, ListRef}; +use risingwave_expr::{function, ExprError, Result}; + +/// Removes a key (and its value) from a JSON object, or matching string value(s) from a JSON array. +/// +/// Examples: +/// +/// ```slt +/// # remove key from object +/// query T +/// SELECT '{"a": "b", "c": "d"}'::jsonb - 'a'; +/// ---- +/// {"c": "d"} +/// +/// # remove matching value from array +/// query T +/// SELECT '["a", "b", "c", "b"]'::jsonb - 'b'; +/// ---- +/// ["a", "c"] +/// +/// query error cannot delete from scalar +/// SELECT '1'::jsonb - 'b'; +/// ``` +#[function("subtract(jsonb, varchar) -> jsonb")] +fn jsonb_remove(v: JsonbRef<'_>, key: &str) -> Result { + match v.into() { + ValueRef::Object(obj) => Ok(JsonbVal::from(Value::object( + obj.iter().filter(|(k, _)| *k != key), + ))), + ValueRef::Array(arr) => Ok(JsonbVal::from(Value::array( + arr.iter().filter(|value| value.as_str() != Some(key)), + ))), + _ => Err(ExprError::InvalidParam { + name: "jsonb", + reason: "cannot delete from scalar".into(), + }), + } +} + +/// Deletes all matching keys or array elements from the left operand. +/// +/// Examples: +/// +/// ```slt +/// query T +/// SELECT '{"a": "b", "c": "d"}'::jsonb - '{a,c}'::text[]; +/// ---- +/// {} +/// +/// query error cannot delete from scalar +/// SELECT '1'::jsonb - '{a,c}'::text[]; +/// ``` +#[function("subtract(jsonb, varchar[]) -> jsonb")] +fn jsonb_remove_keys(v: JsonbRef<'_>, keys: ListRef<'_>) -> Result { + let keys_set: HashSet<&str> = keys.iter().flatten().map(|s| s.into_utf8()).collect(); + + match v.into() { + ValueRef::Object(obj) => Ok(JsonbVal::from(Value::object( + obj.iter().filter(|(k, _)| !keys_set.contains(*k)), + ))), + ValueRef::Array(arr) => { + Ok(JsonbVal::from(Value::array(arr.iter().filter( + |value| match value.as_str() { + Some(s) => !keys_set.contains(s), + None => true, + }, + )))) + } + _ => Err(ExprError::InvalidParam { + name: "jsonb", + reason: "cannot delete from scalar".into(), + }), + } +} + +/// Deletes the array element with the specified index (negative integers count from the end). +/// Throws an error if JSON value is not an array. +/// +/// Examples: +/// +/// ```slt +/// query T +/// SELECT '["a", "b"]'::jsonb - 1; +/// ---- +/// ["a"] +/// +/// query T +/// SELECT '["a", "b"]'::jsonb - -1; +/// ---- +/// ["a"] +/// +/// query T +/// SELECT '["a", "b"]'::jsonb - 2; +/// ---- +/// ["a", "b"] +/// +/// query T +/// SELECT '["a", "b"]'::jsonb - -3; +/// ---- +/// ["a", "b"] +/// +/// query error cannot delete from scalar +/// SELECT '1'::jsonb - 1; +/// +/// query error cannot delete from object using integer index +/// SELECT '{"a": 1}'::jsonb - 1; +/// ``` +#[function("subtract(jsonb, int4) -> jsonb")] +fn jsonb_remove_index(v: JsonbRef<'_>, index: i32) -> Result { + let array = match v.into() { + ValueRef::Array(array) => array, + ValueRef::Object(_) => { + return Err(ExprError::InvalidParam { + name: "jsonb", + reason: "cannot delete from object using integer index".into(), + }) + } + _ => { + return Err(ExprError::InvalidParam { + name: "jsonb", + reason: "cannot delete from scalar".into(), + }) + } + }; + // out of bounds index returns original value + if index < -(array.len() as i32) || index >= (array.len() as i32) { + return Ok(JsonbVal::from(v)); + } + let idx = if index >= 0 { + index as usize + } else { + (array.len() as i32 + index) as usize + }; + Ok(JsonbVal::from(Value::array( + array + .iter() + .enumerate() + .filter(|&(i, _)| i != idx) + .map(|(_, v)| v), + ))) +} + +/// Deletes the field or array element at the specified path, where path elements can be +/// either field keys or array indexes. +/// +/// Examples: +/// +/// ```slt +/// # Basic test case +/// query T +/// SELECT '["a", {"b":1}]'::jsonb #- '{1,b}'; +/// ---- +/// ["a", {}] +/// +/// # Invalid path +/// query error path element at position 1 is null +/// SELECT '["a", {"b":1}]'::jsonb #- array[null]; +/// +/// # Removing non-existent key from an object +/// query T +/// SELECT '{"a": 1, "b": 2}'::jsonb #- '{c}'; +/// ---- +/// {"a": 1, "b": 2} +/// +/// # Removing an existing key from an object +/// query T +/// SELECT '{"a": 1, "b": 2}'::jsonb #- '{a}'; +/// ---- +/// {"b": 2} +/// +/// # Removing an item from an array by positive index +/// query T +/// SELECT '["a", "b", "c"]'::jsonb #- '{1}'; +/// ---- +/// ["a", "c"] +/// +/// # Removing an item from an array by negative index +/// query T +/// SELECT '["a", "b", "c"]'::jsonb #- '{-1}'; +/// ---- +/// ["a", "b"] +/// +/// # Removing a non-existent index from an array +/// query T +/// SELECT '["a", "b", "c"]'::jsonb #- '{3}'; +/// ---- +/// ["a", "b", "c"] +/// +/// # Path element is not an integer for array +/// query error path element at position 1 is not an integer: "a" +/// SELECT '["a", "b", "c"]'::jsonb #- '{a}'; +/// +/// # Path to deeply nested value +/// query T +/// SELECT '{"a": {"b": {"c": [1, 2, 3]}}}'::jsonb #- '{a,b,c,1}'; +/// ---- +/// {"a": {"b": {"c": [1, 3]}}} +/// +/// # Path terminates early (before reaching the final depth of the JSON) +/// query T +/// SELECT '{"a": {"b": {"c": [1, 2, 3]}}}'::jsonb #- '{a}'; +/// ---- +/// {} +/// +/// # Removing non-existent path in nested structure +/// query T +/// SELECT '{"a": {"b": {"c": [1, 2, 3]}}}'::jsonb #- '{a,x}'; +/// ---- +/// {"a": {"b": {"c": [1, 2, 3]}}} +/// +/// # Path is longer than the depth of the JSON structure +/// query T +/// SELECT '{"a": 1}'::jsonb #- '{a,b}'; +/// ---- +/// {"a": 1} +/// +/// # Edge case: Removing root +/// query T +/// SELECT '{"a": 1}'::jsonb #- '{}'; +/// ---- +/// {"a": 1} +/// ``` +#[function("jsonb_remove_path(jsonb, varchar[]) -> jsonb")] +fn jsonb_remove_path(v: JsonbRef<'_>, path: ListRef<'_>) -> Result { + let jsonb: ValueRef<'_> = v.into(); + let mut builder = jsonbb::Builder::>::with_capacity(jsonb.capacity()); + jsonbb_remove_path(jsonb, path, 0, &mut builder)?; + Ok(JsonbVal::from(builder.finish())) +} + +// Recursively remove `path[i..]`` from `jsonb` and write the result to `builder`. +fn jsonbb_remove_path( + jsonb: ValueRef<'_>, + path: ListRef<'_>, + i: usize, + builder: &mut jsonbb::Builder, +) -> Result<()> { + if i == path.len() { + // reached end of path + builder.add_value(jsonb); + return Ok(()); + } + match jsonb { + ValueRef::Object(obj) => { + let key = path + .get(i) + .unwrap() + .ok_or_else(|| ExprError::InvalidParam { + name: "path", + reason: format!("path element at position {} is null", i + 1).into(), + })? + .into_utf8(); + if !obj.contains_key(key) { + builder.add_value(jsonb); + return Ok(()); + } + builder.begin_object(); + for (k, v) in obj.iter() { + if k != key { + builder.add_string(k); + builder.add_value(v); + continue; + } + if i != path.len() - 1 { + builder.add_string(k); + // recursively remove path[i+1..] from v + jsonbb_remove_path(v, path, i + 1, builder)?; + } + } + builder.end_object(); + Ok(()) + } + ValueRef::Array(array) => { + let key = path + .get(i) + .unwrap() + .ok_or_else(|| ExprError::InvalidParam { + name: "path", + reason: format!("path element at position {} is null", i + 1).into(), + })? + .into_utf8(); + let idx = key.parse::().map_err(|_| ExprError::InvalidParam { + name: "path", + reason: format!( + "path element at position {} is not an integer: \"{}\"", + i + 1, + key + ) + .into(), + })?; + // out of bounds index returns original value + if idx < -(array.len() as i32) || idx >= (array.len() as i32) { + builder.add_value(jsonb); + return Ok(()); + } + let idx = if idx >= 0 { + idx as usize + } else { + (array.len() as i32 + idx) as usize + }; + builder.begin_array(); + for (j, v) in array.iter().enumerate() { + if j != idx { + builder.add_value(v); + continue; + } + if i != path.len() - 1 { + // recursively remove path[i+1..] from v + jsonbb_remove_path(v, path, i + 1, builder)?; + } + } + builder.end_array(); + Ok(()) + } + _ => { + builder.add_value(jsonb); + Ok(()) + } + } +} diff --git a/src/expr/impl/src/scalar/mod.rs b/src/expr/impl/src/scalar/mod.rs index d9d10e4548aee..d0b3d8e757fc5 100644 --- a/src/expr/impl/src/scalar/mod.rs +++ b/src/expr/impl/src/scalar/mod.rs @@ -46,6 +46,7 @@ mod jsonb_access; mod jsonb_concat; mod jsonb_info; mod jsonb_object; +mod jsonb_remove; mod length; mod lower; mod md5; diff --git a/src/frontend/src/binder/expr/binary_op.rs b/src/frontend/src/binder/expr/binary_op.rs index f7c8a86144fc9..c5640eb6e73f4 100644 --- a/src/frontend/src/binder/expr/binary_op.rs +++ b/src/frontend/src/binder/expr/binary_op.rs @@ -91,6 +91,7 @@ impl Binder { BinaryOperator::PGBitwiseShiftRight => ExprType::BitwiseShiftRight, BinaryOperator::Arrow => ExprType::JsonbAccessInner, BinaryOperator::LongArrow => ExprType::JsonbAccessStr, + BinaryOperator::HashMinus => ExprType::JsonbRemovePath, BinaryOperator::Prefix => ExprType::StartsWith, BinaryOperator::Concat => { let left_type = (!bound_left.is_untyped()).then(|| bound_left.return_type()); diff --git a/src/frontend/src/expr/pure.rs b/src/frontend/src/expr/pure.rs index a3c7abf1ef482..eea09c3b797ce 100644 --- a/src/frontend/src/expr/pure.rs +++ b/src/frontend/src/expr/pure.rs @@ -179,6 +179,7 @@ impl ExprVisitor for ImpureAnalyzer { | expr_node::Type::JsonbArrayLength | expr_node::Type::JsonbObject | expr_node::Type::JsonbPretty + | expr_node::Type::JsonbRemovePath | expr_node::Type::IsJson | expr_node::Type::Sind | expr_node::Type::Cosd diff --git a/src/sqlparser/src/ast/operator.rs b/src/sqlparser/src/ast/operator.rs index ad084a59425b0..ffe08614964f1 100644 --- a/src/sqlparser/src/ast/operator.rs +++ b/src/sqlparser/src/ast/operator.rs @@ -99,6 +99,7 @@ pub enum BinaryOperator { LongArrow, HashArrow, HashLongArrow, + HashMinus, PGQualified(Box), } @@ -143,6 +144,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::LongArrow => "->>", BinaryOperator::HashArrow => "#>", BinaryOperator::HashLongArrow => "#>>", + BinaryOperator::HashMinus => "#-", BinaryOperator::PGQualified(_) => unreachable!(), }) } diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 5cc094a204268..fd1d09f813f0f 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -1397,6 +1397,7 @@ impl Parser { Token::LongArrow => Some(BinaryOperator::LongArrow), Token::HashArrow => Some(BinaryOperator::HashArrow), Token::HashLongArrow => Some(BinaryOperator::HashLongArrow), + Token::HashMinus => Some(BinaryOperator::HashMinus), Token::Word(w) => match w.keyword { Keyword::AND => Some(BinaryOperator::And), Keyword::OR => Some(BinaryOperator::Or), @@ -1735,7 +1736,8 @@ impl Parser { | Token::Arrow | Token::LongArrow | Token::HashArrow - | Token::HashLongArrow => Ok(P::Other), + | Token::HashLongArrow + | Token::HashMinus => Ok(P::Other), Token::Word(w) if w.keyword == Keyword::OPERATOR && self.peek_nth_token(1) == Token::LParen => { diff --git a/src/sqlparser/src/tokenizer.rs b/src/sqlparser/src/tokenizer.rs index d0d1e096f8f73..da6478b5ec125 100644 --- a/src/sqlparser/src/tokenizer.rs +++ b/src/sqlparser/src/tokenizer.rs @@ -164,6 +164,8 @@ pub enum Token { HashArrow, /// `#>>`, extract JSON sub-object at the specified path as text in PostgreSQL HashLongArrow, + /// `#-`, delete a key from a JSON object in PostgreSQL + HashMinus, } impl fmt::Display for Token { @@ -231,6 +233,7 @@ impl fmt::Display for Token { Token::LongArrow => f.write_str("->>"), Token::HashArrow => f.write_str("#>"), Token::HashLongArrow => f.write_str("#>>"), + Token::HashMinus => f.write_str("#-"), } } } @@ -745,6 +748,7 @@ impl<'a> Tokenizer<'a> { '#' => { chars.next(); // consume the '#' match chars.peek() { + Some('-') => self.consume_and_return(chars, Token::HashMinus), Some('>') => { chars.next(); // consume first '>' match chars.peek() {