From 86a30840fee895991ee4d2acd629365327e0c1ed Mon Sep 17 00:00:00 2001 From: Richard Chien Date: Thu, 27 Jun 2024 15:17:50 +0800 Subject: [PATCH] feat(parser): support `t`, `true`, etc. in csv parser (#17470) Signed-off-by: Richard Chien --- src/connector/src/parser/csv_parser.rs | 74 ++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 3 deletions(-) diff --git a/src/connector/src/parser/csv_parser.rs b/src/connector/src/parser/csv_parser.rs index 25948c273d3e1..386ad392d96ba 100644 --- a/src/connector/src/parser/csv_parser.rs +++ b/src/connector/src/parser/csv_parser.rs @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use risingwave_common::types::{Date, Decimal, Time, Timestamp, Timestamptz}; +use risingwave_common::cast::str_to_bool; +use risingwave_common::types::{Date, Decimal, ScalarImpl, Time, Timestamp, Timestamptz}; use super::unified::{AccessError, AccessResult}; use super::{ByteStreamSourceParser, CsvProperties}; @@ -76,7 +77,15 @@ impl CsvParser { fn parse_string(dtype: &DataType, v: String) -> AccessResult { let v = match dtype { // mysql use tinyint to represent boolean - DataType::Boolean => (parse!(v, i16)? != 0).into(), + DataType::Boolean => { + str_to_bool(&v) + .map(ScalarImpl::Bool) + .map_err(|_| AccessError::TypeError { + expected: "boolean".to_owned(), + got: "string".to_owned(), + value: v, + })? + } DataType::Int16 => parse!(v, i16)?.into(), DataType::Int32 => parse!(v, i32)?.into(), DataType::Int64 => parse!(v, i64)?.into(), @@ -168,7 +177,7 @@ impl ByteStreamSourceParser for CsvParser { mod tests { use risingwave_common::array::Op; use risingwave_common::row::Row; - use risingwave_common::types::{DataType, ScalarImpl, ToOwnedDatum}; + use risingwave_common::types::{DataType, ToOwnedDatum}; use super::*; use crate::parser::SourceStreamChunkBuilder; @@ -377,4 +386,63 @@ mod tests { ); } } + + #[test] + fn test_parse_boolean() { + assert_eq!( + CsvParser::parse_string(&DataType::Boolean, "1".to_string()).unwrap(), + Some(true.into()) + ); + assert_eq!( + CsvParser::parse_string(&DataType::Boolean, "t".to_string()).unwrap(), + Some(true.into()) + ); + assert_eq!( + CsvParser::parse_string(&DataType::Boolean, "T".to_string()).unwrap(), + Some(true.into()) + ); + assert_eq!( + CsvParser::parse_string(&DataType::Boolean, "true".to_string()).unwrap(), + Some(true.into()) + ); + assert_eq!( + CsvParser::parse_string(&DataType::Boolean, "TRUE".to_string()).unwrap(), + Some(true.into()) + ); + assert_eq!( + CsvParser::parse_string(&DataType::Boolean, "True".to_string()).unwrap(), + Some(true.into()) + ); + + assert_eq!( + CsvParser::parse_string(&DataType::Boolean, "0".to_string()).unwrap(), + Some(false.into()) + ); + assert_eq!( + CsvParser::parse_string(&DataType::Boolean, "f".to_string()).unwrap(), + Some(false.into()) + ); + assert_eq!( + CsvParser::parse_string(&DataType::Boolean, "F".to_string()).unwrap(), + Some(false.into()) + ); + assert_eq!( + CsvParser::parse_string(&DataType::Boolean, "false".to_string()).unwrap(), + Some(false.into()) + ); + assert_eq!( + CsvParser::parse_string(&DataType::Boolean, "FALSE".to_string()).unwrap(), + Some(false.into()) + ); + assert_eq!( + CsvParser::parse_string(&DataType::Boolean, "False".to_string()).unwrap(), + Some(false.into()) + ); + + assert!(CsvParser::parse_string(&DataType::Boolean, "2".to_string()).is_err()); + assert!(CsvParser::parse_string(&DataType::Boolean, "t1".to_string()).is_err()); + assert!(CsvParser::parse_string(&DataType::Boolean, "f1".to_string()).is_err()); + assert!(CsvParser::parse_string(&DataType::Boolean, "false1".to_string()).is_err()); + assert!(CsvParser::parse_string(&DataType::Boolean, "TRUE1".to_string()).is_err()); + } }