From 75a8f547eb30af33062d9a41d89aec60c93f5bac Mon Sep 17 00:00:00 2001 From: Runji Wang Date: Thu, 30 May 2024 22:18:59 +0800 Subject: [PATCH] implement `pg_get_keywords` Signed-off-by: Runji Wang --- Cargo.lock | 1 + proto/expr.proto | 1 + src/expr/impl/Cargo.toml | 1 + src/expr/impl/src/table_function/mod.rs | 1 + .../src/table_function/pg_get_keywords.rs | 58 +++++++++++++++++++ src/sqlparser/src/keywords.rs | 4 +- 6 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 src/expr/impl/src/table_function/pg_get_keywords.rs diff --git a/Cargo.lock b/Cargo.lock index ad387bcc1142e..b34a432d55762 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10834,6 +10834,7 @@ dependencies = [ "risingwave_common_estimate_size", "risingwave_expr", "risingwave_pb", + "risingwave_sqlparser", "rust_decimal", "self_cell", "serde", diff --git a/proto/expr.proto b/proto/expr.proto index f859cc8961b20..998ed63a4b084 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -330,6 +330,7 @@ message TableFunction { GENERATE_SUBSCRIPTS = 5; // buf:lint:ignore ENUM_VALUE_UPPER_SNAKE_CASE _PG_EXPANDARRAY = 6; + PG_GET_KEYWORDS = 18; // Jsonb functions JSONB_ARRAY_ELEMENTS = 10; JSONB_ARRAY_ELEMENTS_TEXT = 11; diff --git a/src/expr/impl/Cargo.toml b/src/expr/impl/Cargo.toml index 68b9ecfc55d13..fddb0087aaecb 100644 --- a/src/expr/impl/Cargo.toml +++ b/src/expr/impl/Cargo.toml @@ -57,6 +57,7 @@ regex = "1" risingwave_common = { workspace = true } risingwave_common_estimate_size = { workspace = true } risingwave_expr = { workspace = true } +risingwave_sqlparser = { workspace = true } risingwave_pb = { workspace = true } rust_decimal = { version = "1", features = ["db-postgres", "maths"] } self_cell = "1.0.1" diff --git a/src/expr/impl/src/table_function/mod.rs b/src/expr/impl/src/table_function/mod.rs index 4a52ec5bd1f41..dfafb6721e866 100644 --- a/src/expr/impl/src/table_function/mod.rs +++ b/src/expr/impl/src/table_function/mod.rs @@ -16,5 +16,6 @@ mod generate_series; mod generate_subscripts; mod jsonb; mod pg_expandarray; +mod pg_get_keywords; mod regexp_matches; mod unnest; diff --git a/src/expr/impl/src/table_function/pg_get_keywords.rs b/src/expr/impl/src/table_function/pg_get_keywords.rs new file mode 100644 index 0000000000000..bd18660872720 --- /dev/null +++ b/src/expr/impl/src/table_function/pg_get_keywords.rs @@ -0,0 +1,58 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_expr::function; +use risingwave_sqlparser::keywords::{ + ALL_KEYWORDS_INDEX, RESERVED_FOR_COLUMN_ALIAS, RESERVED_FOR_COLUMN_OR_TABLE_NAME, +}; + +/// Returns a set of records describing the SQL keywords recognized by the server. +/// +/// The word column contains the keyword. +/// +/// The catcode column contains a category code: +/// - U for an unreserved keyword +/// - C for a keyword that can be a column name +/// - T for a keyword that can be a type or function name +/// - R for a fully reserved keyword. +/// +/// The catdesc column contains a possibly-localized string describing the keyword's category. +/// +/// ```slt +/// query TTT +/// select * from pg_get_keywords() where word = 'add'; +/// ---- +/// add U unreserved +/// ``` +#[function("pg_get_keywords() -> setof struct")] +fn pg_get_keywords() -> impl Iterator, &'static str, &'static str)> { + ALL_KEYWORDS_INDEX.iter().map(|keyword| { + // FIXME: The current category is not correct. Many are different from the PostgreSQL. + let catcode = if !RESERVED_FOR_COLUMN_OR_TABLE_NAME.contains(keyword) { + "U" + } else if !RESERVED_FOR_COLUMN_ALIAS.contains(keyword) { + "C" + } else { + "R" + }; + let catdesc = match catcode { + "U" => "unreserved", + "C" => "unreserved (cannot be function or type name)", + "T" => "reserved (can be function or type name)", + "R" => "reserved", + _ => unreachable!(), + }; + (keyword.to_string().to_lowercase().into(), catcode, catdesc) + }) +} diff --git a/src/sqlparser/src/keywords.rs b/src/sqlparser/src/keywords.rs index 0ed69adab3ad0..b61041d79c335 100644 --- a/src/sqlparser/src/keywords.rs +++ b/src/sqlparser/src/keywords.rs @@ -59,7 +59,7 @@ macro_rules! define_keywords { ]; $(kw_def!($ident $(= $string_keyword)?);)* - pub const ALL_KEYWORDS: &[&str] = &[ + pub const ALL_KEYWORDS: &[&'static str] = &[ $($ident),* ]; }; @@ -651,7 +651,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ /// Can't be used as a column or table name in PostgreSQL. /// /// This list is taken from the following table, for all "reserved" words in the PostgreSQL column, -/// includinhg "can be function or type" and "requires AS". +/// including "can be function or type" and "requires AS". /// /// `SELECT` and `WITH` were commented out because the following won't parse: /// `SELECT (SELECT 1)` or `SELECT (WITH a AS (SELECT 1) SELECT 1)`