Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sqlparser): support literal integers in hex / oct / bin #14262

Merged
merged 3 commits into from
Dec 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/common/src/types/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,15 @@ impl Decimal {
let decimal = RustDecimal::from_scientific(value).ok()?;
Some(Normalized(decimal))
}

pub fn from_str_radix(s: &str, radix: u32) -> rust_decimal::Result<Self> {
match s.to_ascii_lowercase().as_str() {
"nan" => Ok(Decimal::NaN),
"inf" | "+inf" | "infinity" | "+infinity" => Ok(Decimal::PositiveInf),
"-inf" | "-infinity" => Ok(Decimal::NegativeInf),
s => RustDecimal::from_str_radix(s, radix).map(Decimal::Normalized),
}
}
}

impl ToBinary for Decimal {
Expand Down
5 changes: 5 additions & 0 deletions src/frontend/planner_test/tests/testdata/input/expr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@
SELECT null < null;
expected_outputs:
- logical_plan
- name: hex bitwise-or bin
sql: |
SELECT 0x25 | 0b110;
expected_outputs:
- logical_plan
- name: bind is distinct from
sql: |
SELECT 1 IS DISTINCT FROM 2
Expand Down
6 changes: 6 additions & 0 deletions src/frontend/planner_test/tests/testdata/output/expr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@
logical_plan: |-
LogicalProject { exprs: [(null:Varchar < null:Varchar) as $expr1] }
└─LogicalValues { rows: [[]], schema: Schema { fields: [] } }
- name: hex bitwise-or bin
sql: |
SELECT 0x25 | 0b110;
logical_plan: |-
LogicalProject { exprs: [(37:Int32 | 6:Int32) as $expr1] }
└─LogicalValues { rows: [[]], schema: Schema { fields: [] } }
- name: bind is distinct from
sql: |
SELECT 1 IS DISTINCT FROM 2
Expand Down
56 changes: 46 additions & 10 deletions src/frontend/src/binder/expr/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,30 @@ impl Binder {
Ok(Literal::new(Some(ScalarImpl::Bool(b)), DataType::Boolean))
}

fn bind_number(&mut self, s: String) -> Result<Literal> {
let (data, data_type) = if let Ok(int_32) = s.parse::<i32>() {
fn bind_number(&mut self, mut s: String) -> Result<Literal> {
let prefix_start = match s.starts_with('-') {
true => 1,
false => 0,
};
let base = match prefix_start + 2 <= s.len() {
true => match &s[prefix_start..prefix_start + 2] {
// tokenizer already converts them to lowercase
"0x" => 16,
"0o" => 8,
"0b" => 2,
_ => 10,
},
false => 10,
};
if base != 10 {
s.replace_range(prefix_start..prefix_start + 2, "");
}

let (data, data_type) = if let Ok(int_32) = i32::from_str_radix(&s, base) {
(Some(ScalarImpl::Int32(int_32)), DataType::Int32)
} else if let Ok(int_64) = s.parse::<i64>() {
} else if let Ok(int_64) = i64::from_str_radix(&s, base) {
(Some(ScalarImpl::Int64(int_64)), DataType::Int64)
} else if let Ok(decimal) = s.parse::<Decimal>() {
} else if let Ok(decimal) = Decimal::from_str_radix(&s, base) {
// Notice: when the length of decimal exceeds 29(>= 30), it will be rounded up.
(Some(ScalarImpl::Decimal(decimal)), DataType::Decimal)
} else if let Some(scientific) = Decimal::from_scientific(&s) {
Expand Down Expand Up @@ -207,15 +225,14 @@ mod tests {
use risingwave_expr::expr::build_from_prost;
use risingwave_sqlparser::ast::Value::Number;

use super::*;
use crate::binder::test_utils::mock_binder;
use crate::expr::{Expr, ExprImpl, ExprType, FunctionCall};

#[tokio::test]
async fn test_bind_value() {
use std::str::FromStr;

use super::*;

let mut binder = mock_binder();
let values = [
"1",
Expand Down Expand Up @@ -254,12 +271,33 @@ mod tests {
}
}

#[tokio::test]
async fn test_bind_radix() {
let mut binder = mock_binder();

for (input, expected) in [
("0x42e3", ScalarImpl::Int32(0x42e3)),
("-0x40", ScalarImpl::Int32(-0x40)),
("0b1101", ScalarImpl::Int32(0b1101)),
("-0b101", ScalarImpl::Int32(-0b101)),
("0o664", ScalarImpl::Int32(0o664)),
("-0o755", ScalarImpl::Int32(-0o755)),
("2147483647", ScalarImpl::Int32(2147483647)),
("2147483648", ScalarImpl::Int64(2147483648)),
("-2147483648", ScalarImpl::Int32(-2147483648)),
("0x7fffffff", ScalarImpl::Int32(0x7fffffff)),
("0x80000000", ScalarImpl::Int64(0x80000000)),
("-0x80000000", ScalarImpl::Int32(-0x80000000)),
] {
let lit = binder.bind_number(input.into()).unwrap();
assert_eq!(lit.get_data().as_ref().unwrap(), &expected);
}
}

#[tokio::test]
async fn test_bind_scientific_number() {
use std::str::FromStr;

use super::*;

let mut binder = mock_binder();
let values = [
("1e6"),
Expand Down Expand Up @@ -336,8 +374,6 @@ mod tests {

#[tokio::test]
async fn test_bind_interval() {
use super::*;

let mut binder = mock_binder();
let values = [
"1 hour",
Expand Down
39 changes: 28 additions & 11 deletions src/sqlparser/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -539,14 +539,6 @@ impl<'a> Tokenizer<'a> {
chars.next(); // consume the first char
let s = self.tokenize_word(ch, chars);

if s.chars().all(|x| x.is_ascii_digit() || x == '.') {
let mut s = peeking_take_while(&mut s.chars().peekable(), |ch| {
ch.is_ascii_digit() || ch == '.'
});
let s2 = peeking_take_while(chars, |ch| ch.is_ascii_digit() || ch == '.');
s += s2.as_str();
return Ok(Some(Token::Number(s)));
}
Comment on lines -542 to -549
Copy link
Contributor Author

@xiangjinwu xiangjinwu Dec 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This hack was here because HiveQL identifier can start with 0..=9
https://github.com/sqlparser-rs/sqlparser-rs/pull/235/files

Ok(Some(Token::make_word(&s, None)))
}
// string
Expand Down Expand Up @@ -574,10 +566,24 @@ impl<'a> Tokenizer<'a> {
let mut s = peeking_take_while(chars, |ch| ch.is_ascii_digit());

// match binary literal that starts with 0x
if s == "0" && chars.peek() == Some(&'x') {
if s == "0"
&& let Some(&radix) = chars.peek()
&& "xob".contains(radix.to_ascii_lowercase())
{
chars.next();
let s2 = peeking_take_while(chars, |ch| ch.is_ascii_hexdigit());
return Ok(Some(Token::HexStringLiteral(s2)));
let radix = radix.to_ascii_lowercase();
let base = match radix {
'x' => 16,
'o' => 8,
'b' => 2,
_ => unreachable!(),
};
let s2 = peeking_take_while(chars, |ch| ch.is_digit(base));
if s2.is_empty() {
return self.tokenizer_error("incomplete integer literal");
}
self.reject_number_junk(chars)?;
return Ok(Some(Token::Number(format!("0{radix}{s2}"))));
}

// match one period
Expand All @@ -603,11 +609,13 @@ impl<'a> Tokenizer<'a> {
chars.next();
}
s += &peeking_take_while(chars, |ch| ch.is_ascii_digit());
self.reject_number_junk(chars)?;
return Ok(Some(Token::Number(s)));
}
// Not a scientific number
_ => {}
};
self.reject_number_junk(chars)?;
Ok(Some(Token::Number(s)))
}
// punctuation
Expand Down Expand Up @@ -901,6 +909,15 @@ impl<'a> Tokenizer<'a> {
})
}

fn reject_number_junk(&self, chars: &mut Peekable<Chars<'_>>) -> Result<(), TokenizerError> {
if let Some(ch) = chars.peek()
&& is_identifier_start(*ch)
{
return self.tokenizer_error("trailing junk after numeric literal");
}
Ok(())
}

// Consume characters until newline
fn tokenize_single_line_comment(&self, chars: &mut Peekable<Chars<'_>>) -> String {
let mut comment = peeking_take_while(chars, |ch| ch != '\n');
Expand Down
30 changes: 30 additions & 0 deletions src/sqlparser/tests/testdata/select.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@
- input: SELECT timestamp with time zone '2022-10-01 12:00:00Z' AT TIME ZONE 'US/Pacific'
formatted_sql: SELECT TIMESTAMP WITH TIME ZONE '2022-10-01 12:00:00Z' AT TIME ZONE 'US/Pacific'
formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(AtTimeZone { timestamp: TypedString { data_type: Timestamp(true), value: "2022-10-01 12:00:00Z" }, time_zone: "US/Pacific" })], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
- input: SELECT 0c6
error_msg: 'sql parser error: trailing junk after numeric literal at Line: 1, Column 8'
- input: SELECT 1e6
formatted_sql: SELECT 1e6
formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Value(Number("1e6")))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
Expand All @@ -89,6 +91,34 @@
- input: SELECT -1e6
formatted_sql: SELECT -1e6
formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Value(Number("-1e6")))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
- input: SELECT 0x42e3
formatted_sql: SELECT 0x42e3
formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Value(Number("0x42e3")))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
- input: SELECT -0X40
formatted_sql: SELECT -0x40
formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Value(Number("-0x40")))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
- input: SELECT 0B1101
formatted_sql: SELECT 0b1101
formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Value(Number("0b1101")))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
- input: SELECT -0b101
formatted_sql: SELECT -0b101
formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Value(Number("-0b101")))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
- input: SELECT 0o664
formatted_sql: SELECT 0o664
formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Value(Number("0o664")))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
- input: SELECT -0O755
formatted_sql: SELECT -0o755
formatted_ast: 'Query(Query { with: None, body: Select(Select { distinct: All, projection: [UnnamedExpr(Value(Number("-0o755")))], from: [], lateral_views: [], selection: None, group_by: [], having: None }), order_by: [], limit: None, offset: None, fetch: None })'
- input: SELECT 0o129
error_msg: |-
sql parser error: Expected end of statement, found: 9 at line:1, column:13
Near "SELECT 0o12"
- input: SELECT 0o3.5
error_msg: |-
sql parser error: Expected end of statement, found: .5 at line:1, column:13
Near "SELECT 0o3"
- input: SELECT 0x
error_msg: 'sql parser error: incomplete integer literal at Line: 1, Column 8'
- input: SELECT 1::float(0)
error_msg: 'sql parser error: precision for type float must be at least 1 bit'
- input: SELECT 1::float(54)
Expand Down
Loading