From 0aab68c23bef1fd4d8346df4eefc7dba4517459e Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Wed, 20 Nov 2024 16:42:00 +0800 Subject: [PATCH] feat(vector): add conversion between vector and string (#5029) * feat(vector): add conversion between vector and string Signed-off-by: Zhenchi * fix sqlness Signed-off-by: Zhenchi * address comments Signed-off-by: Zhenchi --------- Signed-off-by: Zhenchi --- src/common/function/src/scalars/vector.rs | 14 +- .../function/src/scalars/vector/convert.rs | 19 ++ .../scalars/vector/convert/parse_vector.rs | 160 +++++++++++++ .../vector/convert/vector_to_string.rs | 139 +++++++++++ src/common/query/src/error.rs | 11 +- src/datatypes/src/types/vector_type.rs | 20 +- src/datatypes/src/vectors/binary.rs | 2 +- src/servers/src/mysql/writer.rs | 9 +- src/servers/src/postgres/types.rs | 32 +-- src/sql/src/statements.rs | 2 +- tests-integration/tests/sql.rs | 10 +- .../common/function/vector/vector.result | 24 ++ .../common/function/vector/vector.sql | 5 + .../function/vector/vector_distance.result | 96 ++++++++ .../function/vector/vector_distance.sql | 23 ++ .../common/types/vector/vector.result | 219 ++++++++++-------- .../standalone/common/types/vector/vector.sql | 41 ++-- 17 files changed, 656 insertions(+), 170 deletions(-) create mode 100644 src/common/function/src/scalars/vector/convert.rs create mode 100644 src/common/function/src/scalars/vector/convert/parse_vector.rs create mode 100644 src/common/function/src/scalars/vector/convert/vector_to_string.rs create mode 100644 tests/cases/standalone/common/function/vector/vector.result create mode 100644 tests/cases/standalone/common/function/vector/vector.sql create mode 100644 tests/cases/standalone/common/function/vector/vector_distance.result create mode 100644 tests/cases/standalone/common/function/vector/vector_distance.sql diff --git a/src/common/function/src/scalars/vector.rs b/src/common/function/src/scalars/vector.rs index 67b812fd09f0..602504ec83ba 100644 --- a/src/common/function/src/scalars/vector.rs +++ b/src/common/function/src/scalars/vector.rs @@ -12,20 +12,24 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod convert; mod distance; use std::sync::Arc; -use distance::{CosDistanceFunction, DotProductFunction, L2SqDistanceFunction}; - use crate::function_registry::FunctionRegistry; pub(crate) struct VectorFunction; impl VectorFunction { pub fn register(registry: &FunctionRegistry) { - registry.register(Arc::new(CosDistanceFunction)); - registry.register(Arc::new(DotProductFunction)); - registry.register(Arc::new(L2SqDistanceFunction)); + // conversion + registry.register(Arc::new(convert::ParseVectorFunction)); + registry.register(Arc::new(convert::VectorToStringFunction)); + + // distance + registry.register(Arc::new(distance::CosDistanceFunction)); + registry.register(Arc::new(distance::DotProductFunction)); + registry.register(Arc::new(distance::L2SqDistanceFunction)); } } diff --git a/src/common/function/src/scalars/vector/convert.rs b/src/common/function/src/scalars/vector/convert.rs new file mode 100644 index 000000000000..e2b9f1b05132 --- /dev/null +++ b/src/common/function/src/scalars/vector/convert.rs @@ -0,0 +1,19 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod parse_vector; +mod vector_to_string; + +pub use parse_vector::ParseVectorFunction; +pub use vector_to_string::VectorToStringFunction; diff --git a/src/common/function/src/scalars/vector/convert/parse_vector.rs b/src/common/function/src/scalars/vector/convert/parse_vector.rs new file mode 100644 index 000000000000..ae92a10f444c --- /dev/null +++ b/src/common/function/src/scalars/vector/convert/parse_vector.rs @@ -0,0 +1,160 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; + +use common_query::error::{InvalidFuncArgsSnafu, InvalidVectorStringSnafu, Result}; +use common_query::prelude::{Signature, Volatility}; +use datatypes::prelude::ConcreteDataType; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::types::parse_string_to_vector_type_value; +use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef}; +use snafu::{ensure, ResultExt}; + +use crate::function::{Function, FunctionContext}; + +const NAME: &str = "parse_vec"; + +#[derive(Debug, Clone, Default)] +pub struct ParseVectorFunction; + +impl Function for ParseVectorFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::binary_datatype()) + } + + fn signature(&self) -> Signature { + Signature::exact( + vec![ConcreteDataType::string_datatype()], + Volatility::Immutable, + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 1, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly one, have: {}", + columns.len() + ), + } + ); + + let column = &columns[0]; + let size = column.len(); + + let mut result = BinaryVectorBuilder::with_capacity(size); + for i in 0..size { + let value = column.get(i).as_string(); + if let Some(value) = value { + let res = parse_string_to_vector_type_value(&value, None) + .context(InvalidVectorStringSnafu { vec_str: &value })?; + result.push(Some(&res)); + } else { + result.push_null(); + } + } + + Ok(result.to_vector()) + } +} + +impl Display for ParseVectorFunction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", NAME.to_ascii_uppercase()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use common_base::bytes::Bytes; + use datatypes::value::Value; + use datatypes::vectors::StringVector; + + use super::*; + + #[test] + fn test_parse_vector() { + let func = ParseVectorFunction; + + let input = Arc::new(StringVector::from(vec![ + Some("[1.0,2.0,3.0]".to_string()), + Some("[4.0,5.0,6.0]".to_string()), + None, + ])); + + let result = func.eval(FunctionContext::default(), &[input]).unwrap(); + + let result = result.as_ref(); + assert_eq!(result.len(), 3); + assert_eq!( + result.get(0), + Value::Binary(Bytes::from( + [1.0f32, 2.0, 3.0] + .iter() + .flat_map(|e| e.to_le_bytes()) + .collect::>() + )) + ); + assert_eq!( + result.get(1), + Value::Binary(Bytes::from( + [4.0f32, 5.0, 6.0] + .iter() + .flat_map(|e| e.to_le_bytes()) + .collect::>() + )) + ); + assert!(result.get(2).is_null()); + } + + #[test] + fn test_parse_vector_error() { + let func = ParseVectorFunction; + + let input = Arc::new(StringVector::from(vec![ + Some("[1.0,2.0,3.0]".to_string()), + Some("[4.0,5.0,6.0]".to_string()), + Some("[7.0,8.0,9.0".to_string()), + ])); + + let result = func.eval(FunctionContext::default(), &[input]); + assert!(result.is_err()); + + let input = Arc::new(StringVector::from(vec![ + Some("[1.0,2.0,3.0]".to_string()), + Some("[4.0,5.0,6.0]".to_string()), + Some("7.0,8.0,9.0]".to_string()), + ])); + + let result = func.eval(FunctionContext::default(), &[input]); + assert!(result.is_err()); + + let input = Arc::new(StringVector::from(vec![ + Some("[1.0,2.0,3.0]".to_string()), + Some("[4.0,5.0,6.0]".to_string()), + Some("[7.0,hello,9.0]".to_string()), + ])); + + let result = func.eval(FunctionContext::default(), &[input]); + assert!(result.is_err()); + } +} diff --git a/src/common/function/src/scalars/vector/convert/vector_to_string.rs b/src/common/function/src/scalars/vector/convert/vector_to_string.rs new file mode 100644 index 000000000000..456b072910ef --- /dev/null +++ b/src/common/function/src/scalars/vector/convert/vector_to_string.rs @@ -0,0 +1,139 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; + +use common_query::error::{InvalidFuncArgsSnafu, Result}; +use common_query::prelude::{Signature, Volatility}; +use datatypes::prelude::ConcreteDataType; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::types::vector_type_value_to_string; +use datatypes::value::Value; +use datatypes::vectors::{MutableVector, StringVectorBuilder, VectorRef}; +use snafu::ensure; + +use crate::function::{Function, FunctionContext}; + +const NAME: &str = "vec_to_string"; + +#[derive(Debug, Clone, Default)] +pub struct VectorToStringFunction; + +impl Function for VectorToStringFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::string_datatype()) + } + + fn signature(&self) -> Signature { + Signature::exact( + vec![ConcreteDataType::binary_datatype()], + Volatility::Immutable, + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 1, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly one, have: {}", + columns.len() + ), + } + ); + + let column = &columns[0]; + let size = column.len(); + + let mut result = StringVectorBuilder::with_capacity(size); + for i in 0..size { + let value = column.get(i); + match value { + Value::Binary(bytes) => { + let len = bytes.len(); + if len % std::mem::size_of::() != 0 { + return InvalidFuncArgsSnafu { + err_msg: format!("Invalid binary length of vector: {}", len), + } + .fail(); + } + + let dim = len / std::mem::size_of::(); + // Safety: `dim` is calculated from the length of `bytes` and is guaranteed to be valid + let res = vector_type_value_to_string(&bytes, dim as _).unwrap(); + result.push(Some(&res)); + } + Value::Null => { + result.push_null(); + } + _ => { + return InvalidFuncArgsSnafu { + err_msg: format!("Invalid value type: {:?}", value.data_type()), + } + .fail(); + } + } + } + + Ok(result.to_vector()) + } +} + +impl Display for VectorToStringFunction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", NAME.to_ascii_uppercase()) + } +} + +#[cfg(test)] +mod tests { + use datatypes::value::Value; + use datatypes::vectors::BinaryVectorBuilder; + + use super::*; + + #[test] + fn test_vector_to_string() { + let func = VectorToStringFunction; + + let mut builder = BinaryVectorBuilder::with_capacity(3); + builder.push(Some( + [1.0f32, 2.0, 3.0] + .iter() + .flat_map(|e| e.to_le_bytes()) + .collect::>() + .as_slice(), + )); + builder.push(Some( + [4.0f32, 5.0, 6.0] + .iter() + .flat_map(|e| e.to_le_bytes()) + .collect::>() + .as_slice(), + )); + builder.push_null(); + let vector = builder.to_vector(); + + let result = func.eval(FunctionContext::default(), &[vector]).unwrap(); + + assert_eq!(result.len(), 3); + assert_eq!(result.get(0), Value::String("[1,2,3]".to_string().into())); + assert_eq!(result.get(1), Value::String("[4,5,6]".to_string().into())); + assert_eq!(result.get(2), Value::Null); + } +} diff --git a/src/common/query/src/error.rs b/src/common/query/src/error.rs index a7a8de07054e..5ef96058a650 100644 --- a/src/common/query/src/error.rs +++ b/src/common/query/src/error.rs @@ -245,6 +245,14 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Invalid vector string: {}", vec_str))] + InvalidVectorString { + vec_str: String, + source: DataTypeError, + #[snafu(implicit)] + location: Location, + }, } pub type Result = std::result::Result; @@ -273,7 +281,8 @@ impl ErrorExt for Error { | Error::IntoVector { source, .. } | Error::FromScalarValue { source, .. } | Error::ConvertArrowSchema { source, .. } - | Error::FromArrowArray { source, .. } => source.status_code(), + | Error::FromArrowArray { source, .. } + | Error::InvalidVectorString { source, .. } => source.status_code(), Error::MissingTableMutationHandler { .. } | Error::MissingProcedureServiceHandler { .. } diff --git a/src/datatypes/src/types/vector_type.rs b/src/datatypes/src/types/vector_type.rs index 88b6c2787a0f..77f85ba897e9 100644 --- a/src/datatypes/src/types/vector_type.rs +++ b/src/datatypes/src/types/vector_type.rs @@ -102,7 +102,7 @@ pub fn vector_type_value_to_string(val: &[u8], dim: u32) -> Result { /// Parses a string to a vector type value /// Valid input format: "[1.0,2.0,3.0]", "[1.0, 2.0, 3.0]" -pub fn parse_string_to_vector_type_value(s: &str, dim: u32) -> Result> { +pub fn parse_string_to_vector_type_value(s: &str, dim: Option) -> Result> { // Trim the brackets let trimmed = s.trim(); if !trimmed.starts_with('[') || !trimmed.ends_with(']') { @@ -115,7 +115,7 @@ pub fn parse_string_to_vector_type_value(s: &str, dim: u32) -> Result> { let content = trimmed[1..trimmed.len() - 1].trim(); if content.is_empty() { - if dim != 0 { + if dim.map_or(false, |d| d != 0) { return InvalidVectorSnafu { msg: format!("Failed to parse {s} to Vector value: wrong dimension"), } @@ -139,7 +139,7 @@ pub fn parse_string_to_vector_type_value(s: &str, dim: u32) -> Result> { .collect::>>()?; // Check dimension - if elements.len() != dim as usize { + if dim.map_or(false, |d| d as usize != elements.len()) { return InvalidVectorSnafu { msg: format!("Failed to parse {s} to Vector value: wrong dimension"), } @@ -180,7 +180,7 @@ mod tests { ]; for (s, expected) in cases.iter() { - let val = parse_string_to_vector_type_value(s, dim).unwrap(); + let val = parse_string_to_vector_type_value(s, Some(dim)).unwrap(); let s = vector_type_value_to_string(&val, dim).unwrap(); assert_eq!(s, *expected); } @@ -188,7 +188,7 @@ mod tests { let dim = 0; let cases = [("[]", "[]"), ("[ ]", "[]"), ("[ ]", "[]")]; for (s, expected) in cases.iter() { - let val = parse_string_to_vector_type_value(s, dim).unwrap(); + let val = parse_string_to_vector_type_value(s, Some(dim)).unwrap(); let s = vector_type_value_to_string(&val, dim).unwrap(); assert_eq!(s, *expected); } @@ -211,15 +211,15 @@ mod tests { fn test_parse_string_to_vector_type_value_not_properly_enclosed_in_brackets() { let dim = 3; let s = "1.0,2.0,3.0"; - let res = parse_string_to_vector_type_value(s, dim); + let res = parse_string_to_vector_type_value(s, Some(dim)); assert!(res.is_err()); let s = "[1.0,2.0,3.0"; - let res = parse_string_to_vector_type_value(s, dim); + let res = parse_string_to_vector_type_value(s, Some(dim)); assert!(res.is_err()); let s = "1.0,2.0,3.0]"; - let res = parse_string_to_vector_type_value(s, dim); + let res = parse_string_to_vector_type_value(s, Some(dim)); assert!(res.is_err()); } @@ -227,7 +227,7 @@ mod tests { fn test_parse_string_to_vector_type_value_wrong_dimension() { let dim = 3; let s = "[1.0,2.0]"; - let res = parse_string_to_vector_type_value(s, dim); + let res = parse_string_to_vector_type_value(s, Some(dim)); assert!(res.is_err()); } @@ -235,7 +235,7 @@ mod tests { fn test_parse_string_to_vector_type_value_elements_are_not_all_float32() { let dim = 3; let s = "[1.0,2.0,ah]"; - let res = parse_string_to_vector_type_value(s, dim); + let res = parse_string_to_vector_type_value(s, Some(dim)); assert!(res.is_err()); } } diff --git a/src/datatypes/src/vectors/binary.rs b/src/datatypes/src/vectors/binary.rs index c4e8349714f8..45aa40d5e8e5 100644 --- a/src/datatypes/src/vectors/binary.rs +++ b/src/datatypes/src/vectors/binary.rs @@ -80,7 +80,7 @@ impl BinaryVector { let v = if let Some(binary) = binary { let bytes_size = dim as usize * std::mem::size_of::(); if let Ok(s) = String::from_utf8(binary.to_vec()) { - let v = parse_string_to_vector_type_value(&s, dim)?; + let v = parse_string_to_vector_type_value(&s, Some(dim))?; Some(v) } else if binary.len() == dim as usize * std::mem::size_of::() { Some(binary.to_vec()) diff --git a/src/servers/src/mysql/writer.rs b/src/servers/src/mysql/writer.rs index 90faa171debc..79a1474db825 100644 --- a/src/servers/src/mysql/writer.rs +++ b/src/servers/src/mysql/writer.rs @@ -21,7 +21,7 @@ use common_recordbatch::{RecordBatch, SendableRecordBatchStream}; use common_telemetry::{debug, error}; use datatypes::prelude::{ConcreteDataType, Value}; use datatypes::schema::SchemaRef; -use datatypes::types::{json_type_value_to_string, vector_type_value_to_string}; +use datatypes::types::json_type_value_to_string; use futures::StreamExt; use opensrv_mysql::{ Column, ColumnFlags, ColumnType, ErrorKind, OkResponse, QueryResultWriter, RowWriter, @@ -217,11 +217,6 @@ impl<'a, W: AsyncWrite + Unpin> MysqlResultWriter<'a, W> { .context(ConvertSqlValueSnafu)?; row_writer.write_col(s)?; } - ConcreteDataType::Vector(d) => { - let s = vector_type_value_to_string(&v, d.dim) - .context(ConvertSqlValueSnafu)?; - row_writer.write_col(s)?; - } _ => { row_writer.write_col(v.deref())?; } @@ -303,7 +298,7 @@ pub(crate) fn create_mysql_column( ConcreteDataType::Duration(_) => Ok(ColumnType::MYSQL_TYPE_TIME), ConcreteDataType::Decimal128(_) => Ok(ColumnType::MYSQL_TYPE_DECIMAL), ConcreteDataType::Json(_) => Ok(ColumnType::MYSQL_TYPE_JSON), - ConcreteDataType::Vector(_) => Ok(ColumnType::MYSQL_TYPE_STRING), + ConcreteDataType::Vector(_) => Ok(ColumnType::MYSQL_TYPE_BLOB), _ => error::UnsupportedDataTypeSnafu { data_type, reason: "not implemented", diff --git a/src/servers/src/postgres/types.rs b/src/servers/src/postgres/types.rs index b5a331976644..2a413ed00dfa 100644 --- a/src/servers/src/postgres/types.rs +++ b/src/servers/src/postgres/types.rs @@ -27,9 +27,7 @@ use datafusion_expr::LogicalPlan; use datatypes::arrow::datatypes::DataType as ArrowDataType; use datatypes::prelude::{ConcreteDataType, Value}; use datatypes::schema::Schema; -use datatypes::types::{ - json_type_value_to_string, vector_type_value_to_string, IntervalType, TimestampType, -}; +use datatypes::types::{json_type_value_to_string, IntervalType, TimestampType}; use datatypes::value::ListValue; use pgwire::api::portal::{Format, Portal}; use pgwire::api::results::{DataRowEncoder, FieldInfo}; @@ -178,7 +176,7 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Binary(_) => { + &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => { let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output(); match *bytea_output { @@ -370,24 +368,6 @@ fn encode_array( .collect::>>>()?; builder.encode_field(&array) } - &ConcreteDataType::Vector(d) => { - let array = value_list - .items() - .iter() - .map(|v| match v { - Value::Null => Ok(None), - Value::Binary(v) => { - let s = vector_type_value_to_string(v, d.dim) - .map_err(|e| PgWireError::ApiError(Box::new(e)))?; - Ok(Some(s)) - } - _ => Err(PgWireError::ApiError(Box::new(Error::Internal { - err_msg: format!("Invalid list item type, find {v:?}, expected vector",), - }))), - }) - .collect::>>>()?; - builder.encode_field(&array) - } _ => Err(PgWireError::ApiError(Box::new(Error::Internal { err_msg: format!( "cannot write array type {:?} in postgres protocol: unimplemented", @@ -423,11 +403,6 @@ pub(super) fn encode_value( .map_err(|e| PgWireError::ApiError(Box::new(e)))?; builder.encode_field(&s) } - ConcreteDataType::Vector(d) => { - let s = vector_type_value_to_string(v, d.dim) - .map_err(|e| PgWireError::ApiError(Box::new(e)))?; - builder.encode_field(&s) - } _ => { let bytea_output = query_ctx.configuration_parameter().postgres_bytea_output(); match *bytea_output { @@ -503,7 +478,7 @@ pub(super) fn type_gt_to_pg(origin: &ConcreteDataType) -> Result { &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => Ok(Type::INT8), &ConcreteDataType::Float32(_) => Ok(Type::FLOAT4), &ConcreteDataType::Float64(_) => Ok(Type::FLOAT8), - &ConcreteDataType::Binary(_) => Ok(Type::BYTEA), + &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => Ok(Type::BYTEA), &ConcreteDataType::String(_) => Ok(Type::VARCHAR), &ConcreteDataType::Date(_) => Ok(Type::DATE), &ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => Ok(Type::TIMESTAMP), @@ -546,7 +521,6 @@ pub(super) fn type_gt_to_pg(origin: &ConcreteDataType) -> Result { } .fail() } - &ConcreteDataType::Vector(_) => Ok(Type::FLOAT4_ARRAY), } } diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index bb0844a46928..3e1e505a9b1b 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -133,7 +133,7 @@ fn parse_string_to_value( Ok(Value::Binary(v.into())) } ConcreteDataType::Vector(d) => { - let v = parse_string_to_vector_type_value(&s, d.dim).context(DatatypeSnafu)?; + let v = parse_string_to_vector_type_value(&s, Some(d.dim)).context(DatatypeSnafu)?; Ok(Value::Binary(v.into())) } _ => { diff --git a/tests-integration/tests/sql.rs b/tests-integration/tests/sql.rs index 4e16beb052d8..f15e3743256d 100644 --- a/tests-integration/tests/sql.rs +++ b/tests-integration/tests/sql.rs @@ -215,7 +215,7 @@ pub async fn test_mysql_crud(store_type: StorageType) { let dt: DateTime = row.get("dt"); let bytes: Vec = row.get("b"); let json: serde_json::Value = row.get("j"); - let vector: String = row.get("v"); + let vector: Vec = row.get("v"); assert_eq!(ret, i as i64); let expected_d = NaiveDate::from_yo_opt(2015, 100).unwrap(); assert_eq!(expected_d, d); @@ -242,7 +242,13 @@ pub async fn test_mysql_crud(store_type: StorageType) { } }); assert_eq!(json, expected_j); - assert_eq!(vector, "[1,2,3]"); + assert_eq!( + vector, + [1.0f32, 2.0, 3.0] + .iter() + .flat_map(|x| x.to_le_bytes()) + .collect::>() + ); } let rows = sqlx::query("select i from demo where i=?") diff --git a/tests/cases/standalone/common/function/vector/vector.result b/tests/cases/standalone/common/function/vector/vector.result new file mode 100644 index 000000000000..6f0205982685 --- /dev/null +++ b/tests/cases/standalone/common/function/vector/vector.result @@ -0,0 +1,24 @@ +SELECT vec_to_string(parse_vec('[1.0, 2.0]')); + ++----------------------------------------------+ +| vec_to_string(parse_vec(Utf8("[1.0, 2.0]"))) | ++----------------------------------------------+ +| [1,2] | ++----------------------------------------------+ + +SELECT vec_to_string(parse_vec('[1.0, 2.0, 3.0]')); + ++---------------------------------------------------+ +| vec_to_string(parse_vec(Utf8("[1.0, 2.0, 3.0]"))) | ++---------------------------------------------------+ +| [1,2,3] | ++---------------------------------------------------+ + +SELECT vec_to_string(parse_vec('[]')); + ++--------------------------------------+ +| vec_to_string(parse_vec(Utf8("[]"))) | ++--------------------------------------+ +| [] | ++--------------------------------------+ + diff --git a/tests/cases/standalone/common/function/vector/vector.sql b/tests/cases/standalone/common/function/vector/vector.sql new file mode 100644 index 000000000000..97a986916ab1 --- /dev/null +++ b/tests/cases/standalone/common/function/vector/vector.sql @@ -0,0 +1,5 @@ +SELECT vec_to_string(parse_vec('[1.0, 2.0]')); + +SELECT vec_to_string(parse_vec('[1.0, 2.0, 3.0]')); + +SELECT vec_to_string(parse_vec('[]')); diff --git a/tests/cases/standalone/common/function/vector/vector_distance.result b/tests/cases/standalone/common/function/vector/vector_distance.result new file mode 100644 index 000000000000..ac6784945d5e --- /dev/null +++ b/tests/cases/standalone/common/function/vector/vector_distance.result @@ -0,0 +1,96 @@ +SELECT vec_cos_distance('[1.0, 2.0]', '[0.0, 0.0]'); + ++---------------------------------------------------------+ +| vec_cos_distance(Utf8("[1.0, 2.0]"),Utf8("[0.0, 0.0]")) | ++---------------------------------------------------------+ +| 1.0 | ++---------------------------------------------------------+ + +SELECT vec_cos_distance(parse_vec('[1.0, 2.0]'), '[0.0, 0.0]'); + ++--------------------------------------------------------------------+ +| vec_cos_distance(parse_vec(Utf8("[1.0, 2.0]")),Utf8("[0.0, 0.0]")) | ++--------------------------------------------------------------------+ +| 1.0 | ++--------------------------------------------------------------------+ + +SELECT vec_cos_distance('[1.0, 2.0]', parse_vec('[0.0, 0.0]')); + ++--------------------------------------------------------------------+ +| vec_cos_distance(Utf8("[1.0, 2.0]"),parse_vec(Utf8("[0.0, 0.0]"))) | ++--------------------------------------------------------------------+ +| 1.0 | ++--------------------------------------------------------------------+ + +SELECT vec_cos_distance(parse_vec('[1.0, 2.0]'), parse_vec('[0.0, 0.0]')); + ++-------------------------------------------------------------------------------+ +| vec_cos_distance(parse_vec(Utf8("[1.0, 2.0]")),parse_vec(Utf8("[0.0, 0.0]"))) | ++-------------------------------------------------------------------------------+ +| 1.0 | ++-------------------------------------------------------------------------------+ + +SELECT vec_l2sq_distance('[1.0, 2.0]', '[0.0, 0.0]'); + ++----------------------------------------------------------+ +| vec_l2sq_distance(Utf8("[1.0, 2.0]"),Utf8("[0.0, 0.0]")) | ++----------------------------------------------------------+ +| 5.0 | ++----------------------------------------------------------+ + +SELECT vec_l2sq_distance(parse_vec('[1.0, 2.0]'), '[0.0, 0.0]'); + ++---------------------------------------------------------------------+ +| vec_l2sq_distance(parse_vec(Utf8("[1.0, 2.0]")),Utf8("[0.0, 0.0]")) | ++---------------------------------------------------------------------+ +| 5.0 | ++---------------------------------------------------------------------+ + +SELECT vec_l2sq_distance('[1.0, 2.0]', parse_vec('[0.0, 0.0]')); + ++---------------------------------------------------------------------+ +| vec_l2sq_distance(Utf8("[1.0, 2.0]"),parse_vec(Utf8("[0.0, 0.0]"))) | ++---------------------------------------------------------------------+ +| 5.0 | ++---------------------------------------------------------------------+ + +SELECT vec_l2sq_distance(parse_vec('[1.0, 2.0]'), parse_vec('[0.0, 0.0]')); + ++--------------------------------------------------------------------------------+ +| vec_l2sq_distance(parse_vec(Utf8("[1.0, 2.0]")),parse_vec(Utf8("[0.0, 0.0]"))) | ++--------------------------------------------------------------------------------+ +| 5.0 | ++--------------------------------------------------------------------------------+ + +SELECT vec_dot_product('[1.0, 2.0]', '[0.0, 0.0]'); + ++--------------------------------------------------------+ +| vec_dot_product(Utf8("[1.0, 2.0]"),Utf8("[0.0, 0.0]")) | ++--------------------------------------------------------+ +| 0.0 | ++--------------------------------------------------------+ + +SELECT vec_dot_product(parse_vec('[1.0, 2.0]'), '[0.0, 0.0]'); + ++-------------------------------------------------------------------+ +| vec_dot_product(parse_vec(Utf8("[1.0, 2.0]")),Utf8("[0.0, 0.0]")) | ++-------------------------------------------------------------------+ +| 0.0 | ++-------------------------------------------------------------------+ + +SELECT vec_dot_product('[1.0, 2.0]', parse_vec('[0.0, 0.0]')); + ++-------------------------------------------------------------------+ +| vec_dot_product(Utf8("[1.0, 2.0]"),parse_vec(Utf8("[0.0, 0.0]"))) | ++-------------------------------------------------------------------+ +| 0.0 | ++-------------------------------------------------------------------+ + +SELECT vec_dot_product(parse_vec('[1.0, 2.0]'), parse_vec('[0.0, 0.0]')); + ++------------------------------------------------------------------------------+ +| vec_dot_product(parse_vec(Utf8("[1.0, 2.0]")),parse_vec(Utf8("[0.0, 0.0]"))) | ++------------------------------------------------------------------------------+ +| 0.0 | ++------------------------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/function/vector/vector_distance.sql b/tests/cases/standalone/common/function/vector/vector_distance.sql new file mode 100644 index 000000000000..9c048b4defd6 --- /dev/null +++ b/tests/cases/standalone/common/function/vector/vector_distance.sql @@ -0,0 +1,23 @@ +SELECT vec_cos_distance('[1.0, 2.0]', '[0.0, 0.0]'); + +SELECT vec_cos_distance(parse_vec('[1.0, 2.0]'), '[0.0, 0.0]'); + +SELECT vec_cos_distance('[1.0, 2.0]', parse_vec('[0.0, 0.0]')); + +SELECT vec_cos_distance(parse_vec('[1.0, 2.0]'), parse_vec('[0.0, 0.0]')); + +SELECT vec_l2sq_distance('[1.0, 2.0]', '[0.0, 0.0]'); + +SELECT vec_l2sq_distance(parse_vec('[1.0, 2.0]'), '[0.0, 0.0]'); + +SELECT vec_l2sq_distance('[1.0, 2.0]', parse_vec('[0.0, 0.0]')); + +SELECT vec_l2sq_distance(parse_vec('[1.0, 2.0]'), parse_vec('[0.0, 0.0]')); + +SELECT vec_dot_product('[1.0, 2.0]', '[0.0, 0.0]'); + +SELECT vec_dot_product(parse_vec('[1.0, 2.0]'), '[0.0, 0.0]'); + +SELECT vec_dot_product('[1.0, 2.0]', parse_vec('[0.0, 0.0]')); + +SELECT vec_dot_product(parse_vec('[1.0, 2.0]'), parse_vec('[0.0, 0.0]')); diff --git a/tests/cases/standalone/common/types/vector/vector.result b/tests/cases/standalone/common/types/vector/vector.result index 583bd1c293a2..3d40f4f8b289 100644 --- a/tests/cases/standalone/common/types/vector/vector.result +++ b/tests/cases/standalone/common/types/vector/vector.result @@ -2,34 +2,34 @@ CREATE TABLE t (ts TIMESTAMP TIME INDEX, v VECTOR(3)); Affected Rows: 0 +-- Invert string INSERT INTO t VALUES -(1, "[1.0, 2.0, 3.0]"), -(2, "[4.0, 5.0, 6.0]"), -(3, "[7.0, 8.0, 9.0]"); +(1, '[1.0, 2.0, 3.0]'), +(2, '[4.0, 5.0, 6.0]'), +(3, '[7.0, 8.0, 9.0]'); Affected Rows: 3 --- SQLNESS PROTOCOL MYSQL -SELECT * FROM t; +-- Invert vector value +INSERT INTO t VALUES +(4, parse_vec('[1.0, 2.0, 3.0]')), +(5, parse_vec('[4.0, 5.0, 6.0]')), +(6, parse_vec('[7.0, 8.0, 9.0]')); -+----------------------------+---------+ -| ts | v | -+----------------------------+---------+ -| 1970-01-01 00:00:00.001000 | [1,2,3] | -| 1970-01-01 00:00:00.002000 | [4,5,6] | -| 1970-01-01 00:00:00.003000 | [7,8,9] | -+----------------------------+---------+ +Affected Rows: 3 --- SQLNESS PROTOCOL POSTGRES -SELECT * FROM t; +SELECT ts, v, vec_to_string(v) FROM t; -+----------------------------+-----------+ -| ts | v | -+----------------------------+-----------+ -| 1970-01-01 00:00:00.001000 | "[1,2,3]" | -| 1970-01-01 00:00:00.002000 | "[4,5,6]" | -| 1970-01-01 00:00:00.003000 | "[7,8,9]" | -+----------------------------+-----------+ ++-------------------------+--------------------------+--------------------+ +| ts | v | vec_to_string(t.v) | ++-------------------------+--------------------------+--------------------+ +| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | +| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | +| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | +| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | +| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | +| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | ++-------------------------+--------------------------+--------------------+ SELECT round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) FROM t; @@ -39,17 +39,23 @@ SELECT round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) FROM t; | 1.0 | | 1.0 | | 1.0 | +| 1.0 | +| 1.0 | +| 1.0 | +---------------------------------------------------------------+ -SELECT *, round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d; +SELECT ts, v, vec_to_string(v), round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d, ts; -+-------------------------+--------------------------+-----+ -| ts | v | d | -+-------------------------+--------------------------+-----+ -| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | 1.0 | -| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | 1.0 | -| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | 1.0 | -+-------------------------+--------------------------+-----+ ++-------------------------+--------------------------+--------------------+-----+ +| ts | v | vec_to_string(t.v) | d | ++-------------------------+--------------------------+--------------------+-----+ +| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | 1.0 | +| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | 1.0 | +| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | 1.0 | +| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | 1.0 | +| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | 1.0 | +| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | 1.0 | ++-------------------------+--------------------------+--------------------+-----+ SELECT round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) FROM t; @@ -59,17 +65,23 @@ SELECT round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) FROM t; | 0.04 | | 0.0 | | 0.0 | +| 0.04 | +| 0.0 | +| 0.0 | +---------------------------------------------------------------+ -SELECT *, round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d; +SELECT ts, v, vec_to_string(v), round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d, ts; -+-------------------------+--------------------------+------+ -| ts | v | d | -+-------------------------+--------------------------+------+ -| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | 0.0 | -| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | 0.0 | -| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | 0.04 | -+-------------------------+--------------------------+------+ ++-------------------------+--------------------------+--------------------+------+ +| ts | v | vec_to_string(t.v) | d | ++-------------------------+--------------------------+--------------------+------+ +| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | 0.0 | +| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | 0.0 | +| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | 0.0 | +| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | 0.0 | +| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | 0.04 | +| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | 0.04 | ++-------------------------+--------------------------+--------------------+------+ SELECT round(vec_cos_distance(v, v), 2) FROM t; @@ -79,6 +91,9 @@ SELECT round(vec_cos_distance(v, v), 2) FROM t; | 0.0 | | 0.0 | | 0.0 | +| 0.0 | +| 0.0 | +| 0.0 | +-------------------------------------------+ -- Unexpected dimension -- @@ -99,17 +114,23 @@ SELECT round(vec_l2sq_distance(v, '[0.0, 0.0, 0.0]'), 2) FROM t; | 14.0 | | 77.0 | | 194.0 | +| 14.0 | +| 77.0 | +| 194.0 | +----------------------------------------------------------------+ -SELECT *, round(vec_l2sq_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d; +SELECT ts, v, vec_to_string(v), round(vec_l2sq_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d, ts; -+-------------------------+--------------------------+-------+ -| ts | v | d | -+-------------------------+--------------------------+-------+ -| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | 14.0 | -| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | 77.0 | -| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | 194.0 | -+-------------------------+--------------------------+-------+ ++-------------------------+--------------------------+--------------------+-------+ +| ts | v | vec_to_string(t.v) | d | ++-------------------------+--------------------------+--------------------+-------+ +| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | 14.0 | +| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | 14.0 | +| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | 77.0 | +| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | 77.0 | +| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | 194.0 | +| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | 194.0 | ++-------------------------+--------------------------+--------------------+-------+ SELECT round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) FROM t; @@ -119,17 +140,23 @@ SELECT round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) FROM t; | 108.0 | | 27.0 | | 0.0 | +| 108.0 | +| 27.0 | +| 0.0 | +----------------------------------------------------------------+ -SELECT *, round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d; +SELECT ts, v, vec_to_string(v), round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d, ts; -+-------------------------+--------------------------+-------+ -| ts | v | d | -+-------------------------+--------------------------+-------+ -| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | 0.0 | -| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | 27.0 | -| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | 108.0 | -+-------------------------+--------------------------+-------+ ++-------------------------+--------------------------+--------------------+-------+ +| ts | v | vec_to_string(t.v) | d | ++-------------------------+--------------------------+--------------------+-------+ +| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | 0.0 | +| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | 0.0 | +| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | 27.0 | +| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | 27.0 | +| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | 108.0 | +| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | 108.0 | ++-------------------------+--------------------------+--------------------+-------+ SELECT round(vec_l2sq_distance(v, v), 2) FROM t; @@ -139,6 +166,9 @@ SELECT round(vec_l2sq_distance(v, v), 2) FROM t; | 0.0 | | 0.0 | | 0.0 | +| 0.0 | +| 0.0 | +| 0.0 | +--------------------------------------------+ -- Unexpected dimension -- @@ -159,17 +189,23 @@ SELECT round(vec_dot_product(v, '[0.0, 0.0, 0.0]'), 2) FROM t; | 0.0 | | 0.0 | | 0.0 | +| 0.0 | +| 0.0 | +| 0.0 | +--------------------------------------------------------------+ -SELECT *, round(vec_dot_product(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d; +SELECT ts, v, vec_to_string(v), round(vec_dot_product(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d, ts; -+-------------------------+--------------------------+-----+ -| ts | v | d | -+-------------------------+--------------------------+-----+ -| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | 0.0 | -| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | 0.0 | -| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | 0.0 | -+-------------------------+--------------------------+-----+ ++-------------------------+--------------------------+--------------------+-----+ +| ts | v | vec_to_string(t.v) | d | ++-------------------------+--------------------------+--------------------+-----+ +| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | 0.0 | +| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | 0.0 | +| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | 0.0 | +| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | 0.0 | +| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | 0.0 | +| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | 0.0 | ++-------------------------+--------------------------+--------------------+-----+ SELECT round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) FROM t; @@ -179,17 +215,23 @@ SELECT round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) FROM t; | 50.0 | | 122.0 | | 194.0 | +| 50.0 | +| 122.0 | +| 194.0 | +--------------------------------------------------------------+ -SELECT *, round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d; +SELECT ts, v, vec_to_string(v), round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d, ts; -+-------------------------+--------------------------+-------+ -| ts | v | d | -+-------------------------+--------------------------+-------+ -| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | 50.0 | -| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | 122.0 | -| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | 194.0 | -+-------------------------+--------------------------+-------+ ++-------------------------+--------------------------+--------------------+-------+ +| ts | v | vec_to_string(t.v) | d | ++-------------------------+--------------------------+--------------------+-------+ +| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | 50.0 | +| 1970-01-01T00:00:00.004 | 0000803f0000004000004040 | [1,2,3] | 50.0 | +| 1970-01-01T00:00:00.002 | 000080400000a0400000c040 | [4,5,6] | 122.0 | +| 1970-01-01T00:00:00.005 | 000080400000a0400000c040 | [4,5,6] | 122.0 | +| 1970-01-01T00:00:00.003 | 0000e0400000004100001041 | [7,8,9] | 194.0 | +| 1970-01-01T00:00:00.006 | 0000e0400000004100001041 | [7,8,9] | 194.0 | ++-------------------------+--------------------------+--------------------+-------+ SELECT round(vec_dot_product(v, v), 2) FROM t; @@ -199,6 +241,9 @@ SELECT round(vec_dot_product(v, v), 2) FROM t; | 14.0 | | 77.0 | | 194.0 | +| 14.0 | +| 77.0 | +| 194.0 | +------------------------------------------+ -- Unexpected dimension -- @@ -213,19 +258,19 @@ Error: 3001(EngineExecuteQuery), Invalid argument error: Encountered non UTF-8 d -- Unexpected dimension -- INSERT INTO t VALUES -(4, "[1.0]"); +(4, '[1.0]'); Error: 1004(InvalidArguments), Invalid Vector: Failed to parse [1.0] to Vector value: wrong dimension -- Invalid vector value -- INSERT INTO t VALUES -(5, "1.0,2.0,3.0"); +(5, '1.0,2.0,3.0'); Error: 1004(InvalidArguments), Invalid Vector: Failed to parse 1.0,2.0,3.0 to Vector value: not properly enclosed in brackets -- Invalid vector value -- INSERT INTO t VALUES -(6, "[30h, 40s, 50m]"); +(6, '[30h, 40s, 50m]'); Error: 1004(InvalidArguments), Invalid Vector: Failed to parse [30h, 40s, 50m] to Vector value: elements are not all float32 @@ -240,27 +285,15 @@ INSERT INTO t2 (ts) VALUES Affected Rows: 3 --- SQLNESS PROTOCOL MYSQL -SELECT * FROM t2; - -+----------------------------+---------+ -| ts | v | -+----------------------------+---------+ -| 1970-01-01 00:00:00.001000 | [1,2,3] | -| 1970-01-01 00:00:00.002000 | [1,2,3] | -| 1970-01-01 00:00:00.003000 | [1,2,3] | -+----------------------------+---------+ - --- SQLNESS PROTOCOL POSTGRES -SELECT * FROM t2; - -+----------------------------+-----------+ -| ts | v | -+----------------------------+-----------+ -| 1970-01-01 00:00:00.001000 | "[1,2,3]" | -| 1970-01-01 00:00:00.002000 | "[1,2,3]" | -| 1970-01-01 00:00:00.003000 | "[1,2,3]" | -+----------------------------+-----------+ +SELECT ts, v, vec_to_string(v) FROM t2; + ++-------------------------+--------------------------+---------------------+ +| ts | v | vec_to_string(t2.v) | ++-------------------------+--------------------------+---------------------+ +| 1970-01-01T00:00:00.001 | 0000803f0000004000004040 | [1,2,3] | +| 1970-01-01T00:00:00.002 | 0000803f0000004000004040 | [1,2,3] | +| 1970-01-01T00:00:00.003 | 0000803f0000004000004040 | [1,2,3] | ++-------------------------+--------------------------+---------------------+ DROP TABLE t; diff --git a/tests/cases/standalone/common/types/vector/vector.sql b/tests/cases/standalone/common/types/vector/vector.sql index ed98d898dbac..1f81d5cd56fe 100644 --- a/tests/cases/standalone/common/types/vector/vector.sql +++ b/tests/cases/standalone/common/types/vector/vector.sql @@ -1,23 +1,26 @@ CREATE TABLE t (ts TIMESTAMP TIME INDEX, v VECTOR(3)); +-- Invert string INSERT INTO t VALUES -(1, "[1.0, 2.0, 3.0]"), -(2, "[4.0, 5.0, 6.0]"), -(3, "[7.0, 8.0, 9.0]"); +(1, '[1.0, 2.0, 3.0]'), +(2, '[4.0, 5.0, 6.0]'), +(3, '[7.0, 8.0, 9.0]'); --- SQLNESS PROTOCOL MYSQL -SELECT * FROM t; +-- Invert vector value +INSERT INTO t VALUES +(4, parse_vec('[1.0, 2.0, 3.0]')), +(5, parse_vec('[4.0, 5.0, 6.0]')), +(6, parse_vec('[7.0, 8.0, 9.0]')); --- SQLNESS PROTOCOL POSTGRES -SELECT * FROM t; +SELECT ts, v, vec_to_string(v) FROM t; SELECT round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) FROM t; -SELECT *, round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d; +SELECT ts, v, vec_to_string(v), round(vec_cos_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d, ts; SELECT round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) FROM t; -SELECT *, round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d; +SELECT ts, v, vec_to_string(v), round(vec_cos_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d, ts; SELECT round(vec_cos_distance(v, v), 2) FROM t; @@ -29,11 +32,11 @@ SELECT vec_cos_distance(v, 1.0) FROM t; SELECT round(vec_l2sq_distance(v, '[0.0, 0.0, 0.0]'), 2) FROM t; -SELECT *, round(vec_l2sq_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d; +SELECT ts, v, vec_to_string(v), round(vec_l2sq_distance(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d, ts; SELECT round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) FROM t; -SELECT *, round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d; +SELECT ts, v, vec_to_string(v), round(vec_l2sq_distance('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d, ts; SELECT round(vec_l2sq_distance(v, v), 2) FROM t; @@ -46,11 +49,11 @@ SELECT vec_l2sq_distance(v, 1.0) FROM t; SELECT round(vec_dot_product(v, '[0.0, 0.0, 0.0]'), 2) FROM t; -SELECT *, round(vec_dot_product(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d; +SELECT ts, v, vec_to_string(v), round(vec_dot_product(v, '[0.0, 0.0, 0.0]'), 2) as d FROM t ORDER BY d, ts; SELECT round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) FROM t; -SELECT *, round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d; +SELECT ts, v, vec_to_string(v), round(vec_dot_product('[7.0, 8.0, 9.0]', v), 2) as d FROM t ORDER BY d, ts; SELECT round(vec_dot_product(v, v), 2) FROM t; @@ -62,15 +65,15 @@ SELECT vec_dot_product(v, 1.0) FROM t; -- Unexpected dimension -- INSERT INTO t VALUES -(4, "[1.0]"); +(4, '[1.0]'); -- Invalid vector value -- INSERT INTO t VALUES -(5, "1.0,2.0,3.0"); +(5, '1.0,2.0,3.0'); -- Invalid vector value -- INSERT INTO t VALUES -(6, "[30h, 40s, 50m]"); +(6, '[30h, 40s, 50m]'); CREATE TABLE t2 (ts TIMESTAMP TIME INDEX, v VECTOR(3) DEFAULT '[1.0, 2.0, 3.0]'); @@ -79,11 +82,7 @@ INSERT INTO t2 (ts) VALUES (2), (3); --- SQLNESS PROTOCOL MYSQL -SELECT * FROM t2; - --- SQLNESS PROTOCOL POSTGRES -SELECT * FROM t2; +SELECT ts, v, vec_to_string(v) FROM t2; DROP TABLE t;