Skip to content

Commit

Permalink
fix parsing array literal
Browse files Browse the repository at this point in the history
Signed-off-by: Runji Wang <[email protected]>
  • Loading branch information
wangrunji0408 committed Oct 25, 2023
1 parent 7a44630 commit b02635c
Showing 1 changed file with 31 additions and 18 deletions.
49 changes: 31 additions & 18 deletions src/expr/impl/src/scalar/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,15 +177,16 @@ pub fn str_to_bytea(elem: &str) -> Result<Box<[u8]>> {

// TODO(nanderstabel): optimize for multidimensional List. Depth can be given as a parameter to this
// function.
// FIXME(runji): handle escape characters in double quoted strings.
/// Takes a string input in the form of a comma-separated list enclosed in braces, and returns a
/// vector of strings containing the list items.
///
/// # Examples
/// - "{1, 2, 3}" => ["1", "2", "3"]
/// - "{1, {2, 3}}" => ["1", "{2, 3}"]
/// - "{"1,2"}" => ["1,2"]
/// - "{null, "null"}" => [None, "null"]
fn unnest(input: &str) -> Result<Vec<Option<&str>>> {
/// - "{null, NuLL, "null"}" => [None, None, "null"]
fn parse_array_literal(input: &str) -> Result<Vec<Option<&str>>> {
let trimmed = input.trim();
if !trimmed.starts_with('{') || !trimmed.ends_with('}') {
return Err(ExprError::Parse("Input must be braced".into()));
Expand Down Expand Up @@ -215,7 +216,8 @@ fn unnest(input: &str) -> Result<Vec<Option<&str>>> {
},
',' if depth == 0 => {
items.push(match trimmed[start..i].trim() {
"null" => None,
"" => return Err(ExprError::Parse("Unexpected \"}\" character.".into())),
s if s.eq_ignore_ascii_case("null") => None,
s if s.starts_with('"') && s.ends_with('"') => Some(&s[1..s.len() - 1]),
s => Some(s),
});
Expand All @@ -228,8 +230,8 @@ fn unnest(input: &str) -> Result<Vec<Option<&str>>> {
return Err(eof());
}
match trimmed[start..].trim() {
"" => {}
"null" => items.push(None),
"" => return Err(ExprError::Parse("Unexpected \"}\" character.".into())),
s if s.eq_ignore_ascii_case("null") => items.push(None),
s if s.starts_with('"') && s.ends_with('"') => items.push(Some(&s[1..s.len() - 1])),
s => items.push(Some(s)),
}
Expand All @@ -245,7 +247,7 @@ fn str_to_list(input: &str, ctx: &Context) -> Result<ListValue> {
)
.unwrap();
let mut values = vec![];
for item in unnest(input)? {
for item in parse_array_literal(input)? {
match item {
Some(item) => {
let v = cast
Expand Down Expand Up @@ -365,35 +367,46 @@ mod tests {
}

#[test]
fn test_unnest() {
assert_eq!(unnest("{ }").unwrap(), vec![]);
fn test_parse_array_literal() {
assert_eq!(parse_array_literal("{ }").unwrap(), vec![]);
assert_eq!(parse_array_literal("{1 2}").unwrap(), vec![Some("1 2")]);
assert_eq!(
unnest("{1, 2, 3}").unwrap(),
parse_array_literal("{1, 2, 3}").unwrap(),
vec![Some("1"), Some("2"), Some("3")]
);
assert_eq!(unnest(r#"{"1,2"}"#).unwrap(), vec![Some("1,2")]);
assert_eq!(unnest(r#"{1, ""}"#).unwrap(), vec![Some("1"), Some("")]);
assert_eq!(unnest(r#"{"\""}"#).unwrap(), vec![Some("1"), Some("\"")]);
assert_eq!(
unnest(r#"{"null", null}"#).unwrap(),
vec![Some("null"), None]
parse_array_literal(r#"{"1,2"}"#).unwrap(),
vec![Some("1,2")]
);
assert_eq!(
unnest("{{1, 2, 3}, {4, 5, 6}}").unwrap(),
parse_array_literal(r#"{1, ""}"#).unwrap(),
vec![Some("1"), Some("")]
);
// FIXME: handle escape characters in double quoted strings.
// assert_eq!(parse_array_literal(r#"{"\""}"#).unwrap(), vec![Some("\"")]);
assert_eq!(
parse_array_literal(r#"{"null", "NULL", null, NuLL}"#).unwrap(),
vec![Some("null"), Some("NULL"), None, None]
);
assert_eq!(
parse_array_literal("{{1, 2, 3}, {4, 5, 6}}").unwrap(),
vec![Some("{1, 2, 3}"), Some("{4, 5, 6}")]
);
assert_eq!(
unnest("{{{1, 2, 3}}, {{4, 5, 6}}}").unwrap(),
parse_array_literal("{{{1, 2, 3}}, {{4, 5, 6}}}").unwrap(),
vec![Some("{{1, 2, 3}}"), Some("{{4, 5, 6}}")]
);
assert_eq!(
unnest("{{{1, 2, 3}, {4, 5, 6}}}").unwrap(),
parse_array_literal("{{{1, 2, 3}, {4, 5, 6}}}").unwrap(),
vec![Some("{{1, 2, 3}, {4, 5, 6}}")]
);
assert_eq!(
unnest("{{{aa, bb, cc}, {dd, ee, ff}}}").unwrap(),
parse_array_literal("{{{aa, bb, cc}, {dd, ee, ff}}}").unwrap(),
vec![Some("{{aa, bb, cc}, {dd, ee, ff}}")]
);
// Unexpected "," character.
assert!(parse_array_literal("{1,}").is_err());
assert!(parse_array_literal("{1,,3}").is_err());
}

#[test]
Expand Down

0 comments on commit b02635c

Please sign in to comment.