From 039989f77bea142c9f9767dfa0b85ed388c1695c Mon Sep 17 00:00:00 2001 From: Lin Yihai Date: Wed, 25 Dec 2024 22:17:22 +0800 Subject: [PATCH] feat: Add `vec_mul` function. (#5205) --- src/common/function/src/scalars/vector.rs | 4 + .../function/src/scalars/vector/vector_mul.rs | 205 ++++++++++++++++++ .../common/function/vector/vector.result | 24 ++ .../common/function/vector/vector.sql | 6 + 4 files changed, 239 insertions(+) create mode 100644 src/common/function/src/scalars/vector/vector_mul.rs diff --git a/src/common/function/src/scalars/vector.rs b/src/common/function/src/scalars/vector.rs index d462b917af59..b3a6f105ad01 100644 --- a/src/common/function/src/scalars/vector.rs +++ b/src/common/function/src/scalars/vector.rs @@ -17,6 +17,7 @@ mod distance; pub(crate) mod impl_conv; mod scalar_add; mod scalar_mul; +mod vector_mul; use std::sync::Arc; @@ -38,5 +39,8 @@ impl VectorFunction { // scalar calculation registry.register(Arc::new(scalar_add::ScalarAddFunction)); registry.register(Arc::new(scalar_mul::ScalarMulFunction)); + + // vector calculation + registry.register(Arc::new(vector_mul::VectorMulFunction)); } } diff --git a/src/common/function/src/scalars/vector/vector_mul.rs b/src/common/function/src/scalars/vector/vector_mul.rs new file mode 100644 index 000000000000..02e9833623e9 --- /dev/null +++ b/src/common/function/src/scalars/vector/vector_mul.rs @@ -0,0 +1,205 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; +use std::fmt::Display; + +use common_query::error::{InvalidFuncArgsSnafu, Result}; +use common_query::prelude::Signature; +use datatypes::prelude::ConcreteDataType; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef}; +use nalgebra::DVectorView; +use snafu::ensure; + +use crate::function::{Function, FunctionContext}; +use crate::helper; +use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit}; + +const NAME: &str = "vec_mul"; + +/// Multiplies corresponding elements of two vectors. +/// +/// # Example +/// +/// ```sql +/// SELECT vec_to_string(vec_mul("[1, 2, 3]", "[1, 2, 3]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [1,4,9] | +/// +---------+ +/// +/// ``` +#[derive(Debug, Clone, Default)] +pub struct VectorMulFunction; + +impl Function for VectorMulFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::binary_datatype()) + } + + fn signature(&self) -> Signature { + helper::one_of_sigs2( + vec![ + ConcreteDataType::string_datatype(), + ConcreteDataType::binary_datatype(), + ], + vec![ + ConcreteDataType::string_datatype(), + ConcreteDataType::binary_datatype(), + ], + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 2, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly two, have: {}", + columns.len() + ), + } + ); + + let arg0 = &columns[0]; + let arg1 = &columns[1]; + + let len = arg0.len(); + let mut result = BinaryVectorBuilder::with_capacity(len); + if len == 0 { + return Ok(result.to_vector()); + } + + let arg0_const = as_veclit_if_const(arg0)?; + let arg1_const = as_veclit_if_const(arg1)?; + + for i in 0..len { + let arg0 = match arg0_const.as_ref() { + Some(arg0) => Some(Cow::Borrowed(arg0.as_ref())), + None => as_veclit(arg0.get_ref(i))?, + }; + + let arg1 = match arg1_const.as_ref() { + Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())), + None => as_veclit(arg1.get_ref(i))?, + }; + + if let (Some(arg0), Some(arg1)) = (arg0, arg1) { + ensure!( + arg0.len() == arg1.len(), + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the vectors must match for multiplying, have: {} vs {}", + arg0.len(), + arg1.len() + ), + } + ); + let vec0 = DVectorView::from_slice(&arg0, arg0.len()); + let vec1 = DVectorView::from_slice(&arg1, arg1.len()); + let vec_res = vec1.component_mul(&vec0); + + let veclit = vec_res.as_slice(); + let binlit = veclit_to_binlit(veclit); + result.push(Some(&binlit)); + } else { + result.push_null(); + } + } + + Ok(result.to_vector()) + } +} + +impl Display for VectorMulFunction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", NAME.to_ascii_uppercase()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use common_query::error; + use datatypes::vectors::StringVector; + + use super::*; + + #[test] + fn test_vector_mul() { + let func = VectorMulFunction; + + let vec0 = vec![1.0, 2.0, 3.0]; + let vec1 = vec![1.0, 1.0]; + let (len0, len1) = (vec0.len(), vec1.len()); + let input0 = Arc::new(StringVector::from(vec![Some(format!("{vec0:?}"))])); + let input1 = Arc::new(StringVector::from(vec![Some(format!("{vec1:?}"))])); + + let err = func + .eval(FunctionContext::default(), &[input0, input1]) + .unwrap_err(); + + match err { + error::Error::InvalidFuncArgs { err_msg, .. } => { + assert_eq!( + err_msg, + format!( + "The length of the vectors must match for multiplying, have: {} vs {}", + len0, len1 + ) + ) + } + _ => unreachable!(), + } + + let input0 = Arc::new(StringVector::from(vec![ + Some("[1.0,2.0,3.0]".to_string()), + Some("[8.0,10.0,12.0]".to_string()), + Some("[7.0,8.0,9.0]".to_string()), + None, + ])); + + let input1 = Arc::new(StringVector::from(vec![ + Some("[1.0,1.0,1.0]".to_string()), + Some("[2.0,2.0,2.0]".to_string()), + None, + Some("[3.0,3.0,3.0]".to_string()), + ])); + + let result = func + .eval(FunctionContext::default(), &[input0, input1]) + .unwrap(); + + let result = result.as_ref(); + assert_eq!(result.len(), 4); + assert_eq!( + result.get_ref(0).as_binary().unwrap(), + Some(veclit_to_binlit(&[1.0, 2.0, 3.0]).as_slice()) + ); + assert_eq!( + result.get_ref(1).as_binary().unwrap(), + Some(veclit_to_binlit(&[16.0, 20.0, 24.0]).as_slice()) + ); + assert!(result.get_ref(2).is_null()); + assert!(result.get_ref(3).is_null()); + } +} diff --git a/tests/cases/standalone/common/function/vector/vector.result b/tests/cases/standalone/common/function/vector/vector.result index 6f0205982685..0bcca4740350 100644 --- a/tests/cases/standalone/common/function/vector/vector.result +++ b/tests/cases/standalone/common/function/vector/vector.result @@ -22,3 +22,27 @@ SELECT vec_to_string(parse_vec('[]')); | [] | +--------------------------------------+ +SELECT vec_to_string(vec_mul('[1.0, 2.0]', '[3.0, 4.0]')); + ++---------------------------------------------------------------+ +| vec_to_string(vec_mul(Utf8("[1.0, 2.0]"),Utf8("[3.0, 4.0]"))) | ++---------------------------------------------------------------+ +| [3,8] | ++---------------------------------------------------------------+ + +SELECT vec_to_string(vec_mul(parse_vec('[1.0, 2.0]'), '[3.0, 4.0]')); + ++--------------------------------------------------------------------------+ +| vec_to_string(vec_mul(parse_vec(Utf8("[1.0, 2.0]")),Utf8("[3.0, 4.0]"))) | ++--------------------------------------------------------------------------+ +| [3,8] | ++--------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_mul('[1.0, 2.0]', parse_vec('[3.0, 4.0]'))); + ++--------------------------------------------------------------------------+ +| vec_to_string(vec_mul(Utf8("[1.0, 2.0]"),parse_vec(Utf8("[3.0, 4.0]")))) | ++--------------------------------------------------------------------------+ +| [3,8] | ++--------------------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/function/vector/vector.sql b/tests/cases/standalone/common/function/vector/vector.sql index 97a986916ab1..3f46fa8f2210 100644 --- a/tests/cases/standalone/common/function/vector/vector.sql +++ b/tests/cases/standalone/common/function/vector/vector.sql @@ -3,3 +3,9 @@ SELECT vec_to_string(parse_vec('[1.0, 2.0]')); SELECT vec_to_string(parse_vec('[1.0, 2.0, 3.0]')); SELECT vec_to_string(parse_vec('[]')); + +SELECT vec_to_string(vec_mul('[1.0, 2.0]', '[3.0, 4.0]')); + +SELECT vec_to_string(vec_mul(parse_vec('[1.0, 2.0]'), '[3.0, 4.0]')); + +SELECT vec_to_string(vec_mul('[1.0, 2.0]', parse_vec('[3.0, 4.0]'))); \ No newline at end of file