From b02635c220b1fd8ee684848914b869deec0a2b4c Mon Sep 17 00:00:00 2001 From: Runji Wang Date: Wed, 25 Oct 2023 12:30:29 +0800 Subject: [PATCH] fix parsing array literal Signed-off-by: Runji Wang --- src/expr/impl/src/scalar/cast.rs | 49 ++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/src/expr/impl/src/scalar/cast.rs b/src/expr/impl/src/scalar/cast.rs index 707c943520066..02ac5a6f4be9a 100644 --- a/src/expr/impl/src/scalar/cast.rs +++ b/src/expr/impl/src/scalar/cast.rs @@ -177,6 +177,7 @@ pub fn str_to_bytea(elem: &str) -> Result> { // TODO(nanderstabel): optimize for multidimensional List. Depth can be given as a parameter to this // function. +// FIXME(runji): handle escape characters in double quoted strings. /// Takes a string input in the form of a comma-separated list enclosed in braces, and returns a /// vector of strings containing the list items. /// @@ -184,8 +185,8 @@ pub fn str_to_bytea(elem: &str) -> Result> { /// - "{1, 2, 3}" => ["1", "2", "3"] /// - "{1, {2, 3}}" => ["1", "{2, 3}"] /// - "{"1,2"}" => ["1,2"] -/// - "{null, "null"}" => [None, "null"] -fn unnest(input: &str) -> Result>> { +/// - "{null, NuLL, "null"}" => [None, None, "null"] +fn parse_array_literal(input: &str) -> Result>> { let trimmed = input.trim(); if !trimmed.starts_with('{') || !trimmed.ends_with('}') { return Err(ExprError::Parse("Input must be braced".into())); @@ -215,7 +216,8 @@ fn unnest(input: &str) -> Result>> { }, ',' if depth == 0 => { items.push(match trimmed[start..i].trim() { - "null" => None, + "" => return Err(ExprError::Parse("Unexpected \"}\" character.".into())), + s if s.eq_ignore_ascii_case("null") => None, s if s.starts_with('"') && s.ends_with('"') => Some(&s[1..s.len() - 1]), s => Some(s), }); @@ -228,8 +230,8 @@ fn unnest(input: &str) -> Result>> { return Err(eof()); } match trimmed[start..].trim() { - "" => {} - "null" => items.push(None), + "" => return Err(ExprError::Parse("Unexpected \"}\" character.".into())), + s if s.eq_ignore_ascii_case("null") => items.push(None), s if s.starts_with('"') && s.ends_with('"') => items.push(Some(&s[1..s.len() - 1])), s => items.push(Some(s)), } @@ -245,7 +247,7 @@ fn str_to_list(input: &str, ctx: &Context) -> Result { ) .unwrap(); let mut values = vec![]; - for item in unnest(input)? { + for item in parse_array_literal(input)? { match item { Some(item) => { let v = cast @@ -365,35 +367,46 @@ mod tests { } #[test] - fn test_unnest() { - assert_eq!(unnest("{ }").unwrap(), vec![]); + fn test_parse_array_literal() { + assert_eq!(parse_array_literal("{ }").unwrap(), vec![]); + assert_eq!(parse_array_literal("{1 2}").unwrap(), vec![Some("1 2")]); assert_eq!( - unnest("{1, 2, 3}").unwrap(), + parse_array_literal("{1, 2, 3}").unwrap(), vec![Some("1"), Some("2"), Some("3")] ); - assert_eq!(unnest(r#"{"1,2"}"#).unwrap(), vec![Some("1,2")]); - assert_eq!(unnest(r#"{1, ""}"#).unwrap(), vec![Some("1"), Some("")]); - assert_eq!(unnest(r#"{"\""}"#).unwrap(), vec![Some("1"), Some("\"")]); assert_eq!( - unnest(r#"{"null", null}"#).unwrap(), - vec![Some("null"), None] + parse_array_literal(r#"{"1,2"}"#).unwrap(), + vec![Some("1,2")] ); assert_eq!( - unnest("{{1, 2, 3}, {4, 5, 6}}").unwrap(), + parse_array_literal(r#"{1, ""}"#).unwrap(), + vec![Some("1"), Some("")] + ); + // FIXME: handle escape characters in double quoted strings. + // assert_eq!(parse_array_literal(r#"{"\""}"#).unwrap(), vec![Some("\"")]); + assert_eq!( + parse_array_literal(r#"{"null", "NULL", null, NuLL}"#).unwrap(), + vec![Some("null"), Some("NULL"), None, None] + ); + assert_eq!( + parse_array_literal("{{1, 2, 3}, {4, 5, 6}}").unwrap(), vec![Some("{1, 2, 3}"), Some("{4, 5, 6}")] ); assert_eq!( - unnest("{{{1, 2, 3}}, {{4, 5, 6}}}").unwrap(), + parse_array_literal("{{{1, 2, 3}}, {{4, 5, 6}}}").unwrap(), vec![Some("{{1, 2, 3}}"), Some("{{4, 5, 6}}")] ); assert_eq!( - unnest("{{{1, 2, 3}, {4, 5, 6}}}").unwrap(), + parse_array_literal("{{{1, 2, 3}, {4, 5, 6}}}").unwrap(), vec![Some("{{1, 2, 3}, {4, 5, 6}}")] ); assert_eq!( - unnest("{{{aa, bb, cc}, {dd, ee, ff}}}").unwrap(), + parse_array_literal("{{{aa, bb, cc}, {dd, ee, ff}}}").unwrap(), vec![Some("{{aa, bb, cc}, {dd, ee, ff}}")] ); + // Unexpected "," character. + assert!(parse_array_literal("{1,}").is_err()); + assert!(parse_array_literal("{1,,3}").is_err()); } #[test]