diff --git a/Cargo.lock b/Cargo.lock index ad387bcc1142..b34a432d5576 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10834,6 +10834,7 @@ dependencies = [ "risingwave_common_estimate_size", "risingwave_expr", "risingwave_pb", + "risingwave_sqlparser", "rust_decimal", "self_cell", "serde", diff --git a/proto/expr.proto b/proto/expr.proto index f859cc8961b2..998ed63a4b08 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -330,6 +330,7 @@ message TableFunction { GENERATE_SUBSCRIPTS = 5; // buf:lint:ignore ENUM_VALUE_UPPER_SNAKE_CASE _PG_EXPANDARRAY = 6; + PG_GET_KEYWORDS = 18; // Jsonb functions JSONB_ARRAY_ELEMENTS = 10; JSONB_ARRAY_ELEMENTS_TEXT = 11; diff --git a/src/expr/core/src/codegen.rs b/src/expr/core/src/codegen.rs index c9022e40aec9..e944f43950fd 100644 --- a/src/expr/core/src/codegen.rs +++ b/src/expr/core/src/codegen.rs @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +//! This module contains imports that are used in the generated code for the `#[function]` macro. + pub use async_trait::async_trait; pub use futures_async_stream::try_stream; pub use futures_util::stream::BoxStream; -pub use itertools::multizip; pub use linkme; diff --git a/src/expr/impl/Cargo.toml b/src/expr/impl/Cargo.toml index 68b9ecfc55d1..292e631345fa 100644 --- a/src/expr/impl/Cargo.toml +++ b/src/expr/impl/Cargo.toml @@ -58,6 +58,7 @@ risingwave_common = { workspace = true } risingwave_common_estimate_size = { workspace = true } risingwave_expr = { workspace = true } risingwave_pb = { workspace = true } +risingwave_sqlparser = { workspace = true } rust_decimal = { version = "1", features = ["db-postgres", "maths"] } self_cell = "1.0.1" serde = { version = "1", features = ["derive"] } diff --git a/src/expr/impl/src/table_function/mod.rs b/src/expr/impl/src/table_function/mod.rs index 4a52ec5bd1f4..dfafb6721e86 100644 --- a/src/expr/impl/src/table_function/mod.rs +++ b/src/expr/impl/src/table_function/mod.rs @@ -16,5 +16,6 @@ mod generate_series; mod generate_subscripts; mod jsonb; mod pg_expandarray; +mod pg_get_keywords; mod regexp_matches; mod unnest; diff --git a/src/expr/impl/src/table_function/pg_get_keywords.rs b/src/expr/impl/src/table_function/pg_get_keywords.rs new file mode 100644 index 000000000000..bd1866087272 --- /dev/null +++ b/src/expr/impl/src/table_function/pg_get_keywords.rs @@ -0,0 +1,58 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use risingwave_expr::function; +use risingwave_sqlparser::keywords::{ + ALL_KEYWORDS_INDEX, RESERVED_FOR_COLUMN_ALIAS, RESERVED_FOR_COLUMN_OR_TABLE_NAME, +}; + +/// Returns a set of records describing the SQL keywords recognized by the server. +/// +/// The word column contains the keyword. +/// +/// The catcode column contains a category code: +/// - U for an unreserved keyword +/// - C for a keyword that can be a column name +/// - T for a keyword that can be a type or function name +/// - R for a fully reserved keyword. +/// +/// The catdesc column contains a possibly-localized string describing the keyword's category. +/// +/// ```slt +/// query TTT +/// select * from pg_get_keywords() where word = 'add'; +/// ---- +/// add U unreserved +/// ``` +#[function("pg_get_keywords() -> setof struct")] +fn pg_get_keywords() -> impl Iterator, &'static str, &'static str)> { + ALL_KEYWORDS_INDEX.iter().map(|keyword| { + // FIXME: The current category is not correct. Many are different from the PostgreSQL. + let catcode = if !RESERVED_FOR_COLUMN_OR_TABLE_NAME.contains(keyword) { + "U" + } else if !RESERVED_FOR_COLUMN_ALIAS.contains(keyword) { + "C" + } else { + "R" + }; + let catdesc = match catcode { + "U" => "unreserved", + "C" => "unreserved (cannot be function or type name)", + "T" => "reserved (can be function or type name)", + "R" => "reserved", + _ => unreachable!(), + }; + (keyword.to_string().to_lowercase().into(), catcode, catdesc) + }) +} diff --git a/src/expr/macro/src/gen.rs b/src/expr/macro/src/gen.rs index 1185df8a194b..7174b3adf29e 100644 --- a/src/expr/macro/src/gen.rs +++ b/src/expr/macro/src/gen.rs @@ -484,10 +484,6 @@ impl FunctionAttr { } } else { // no optimization - let array_zip = match children_indices.len() { - 0 => quote! { std::iter::repeat(()).take(input.capacity()) }, - _ => quote! { multizip((#(#arrays.iter(),)*)) }, - }; let let_variadic = variadic.then(|| { quote! { let variadic_row = variadic_input.row_at_unchecked_vis(i); @@ -497,18 +493,18 @@ impl FunctionAttr { let mut builder = #builder_type::with_type(input.capacity(), self.context.return_type.clone()); if input.is_compacted() { - for (i, (#(#inputs,)*)) in #array_zip.enumerate() { + for i in 0..input.capacity() { + #(let #inputs = unsafe { #arrays.value_at_unchecked(i) };)* #let_variadic #append_output } } else { - // allow using `zip` for performance - #[allow(clippy::disallowed_methods)] - for (i, ((#(#inputs,)*), visible)) in #array_zip.zip(input.visibility().iter()).enumerate() { - if !visible { + for i in 0..input.capacity() { + if unsafe { !input.visibility().is_set_unchecked(i) } { builder.append_null(); continue; } + #(let #inputs = unsafe { #arrays.value_at_unchecked(i) };)* #let_variadic #append_output } @@ -1179,10 +1175,11 @@ impl FunctionAttr { let mut index_builder = I32ArrayBuilder::new(self.chunk_size); #(let mut #builders = #builder_types::with_type(self.chunk_size, #return_types);)* - for (i, ((#(#inputs,)*), visible)) in multizip((#(#arrays.iter(),)*)).zip_eq_fast(input.visibility().iter()).enumerate() { - if !visible { + for i in 0..input.capacity() { + if unsafe { !input.visibility().is_set_unchecked(i) } { continue; } + #(let #inputs = unsafe { #arrays.value_at_unchecked(i) };)* for output in #iter { index_builder.append(Some(i as i32)); match #output { diff --git a/src/frontend/src/binder/relation/table_function.rs b/src/frontend/src/binder/relation/table_function.rs index efb4de3c5551..1a609f87670f 100644 --- a/src/frontend/src/binder/relation/table_function.rs +++ b/src/frontend/src/binder/relation/table_function.rs @@ -16,18 +16,13 @@ use std::str::FromStr; use itertools::Itertools; use risingwave_common::bail_not_implemented; -use risingwave_common::catalog::{ - Field, Schema, PG_CATALOG_SCHEMA_NAME, RW_INTERNAL_TABLE_FUNCTION_NAME, -}; +use risingwave_common::catalog::{Field, Schema, RW_INTERNAL_TABLE_FUNCTION_NAME}; use risingwave_common::types::DataType; use risingwave_sqlparser::ast::{Function, FunctionArg, ObjectName, TableAlias}; use super::watermark::is_watermark_func; use super::{Binder, Relation, Result, WindowTableFunctionKind}; use crate::binder::bind_context::Clause; -use crate::catalog::system_catalog::pg_catalog::{ - PG_GET_KEYWORDS_FUNC_NAME, PG_KEYWORDS_TABLE_NAME, -}; use crate::error::ErrorCode; use crate::expr::{Expr, ExprImpl}; @@ -57,24 +52,6 @@ impl Binder { } return self.bind_internal_table(args, alias); } - if func_name.eq_ignore_ascii_case(PG_GET_KEYWORDS_FUNC_NAME) - || name.real_value().eq_ignore_ascii_case( - format!("{}.{}", PG_CATALOG_SCHEMA_NAME, PG_GET_KEYWORDS_FUNC_NAME).as_str(), - ) - { - if with_ordinality { - bail_not_implemented!( - "WITH ORDINALITY for internal/system table function {}", - func_name - ); - } - return self.bind_relation_by_name_inner( - Some(PG_CATALOG_SCHEMA_NAME), - PG_KEYWORDS_TABLE_NAME, - alias, - None, - ); - } } // window table functions (tumble/hop) if let Ok(kind) = WindowTableFunctionKind::from_str(func_name) { diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs index 202646b55a60..de1fe4924642 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/mod.rs @@ -52,5 +52,3 @@ mod pg_trigger; mod pg_type; mod pg_user; mod pg_views; - -pub use pg_keywords::*; diff --git a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_keywords.rs b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_keywords.rs index c3bb43ef0a84..a859527afa6d 100644 --- a/src/frontend/src/catalog/system_catalog/pg_catalog/pg_keywords.rs +++ b/src/frontend/src/catalog/system_catalog/pg_catalog/pg_keywords.rs @@ -12,19 +12,60 @@ // See the License for the specific language governing permissions and // limitations under the License. +// The code is same as `expr/impl/src/table_function/pg_get_keywords.rs`. + use risingwave_common::types::Fields; use risingwave_frontend_macro::system_catalog; +use risingwave_sqlparser::keywords::{ + ALL_KEYWORDS_INDEX, RESERVED_FOR_COLUMN_ALIAS, RESERVED_FOR_COLUMN_OR_TABLE_NAME, +}; -pub const PG_KEYWORDS_TABLE_NAME: &str = "pg_keywords"; -pub const PG_GET_KEYWORDS_FUNC_NAME: &str = "pg_get_keywords"; +use crate::catalog::system_catalog::SysCatalogReaderImpl; /// The catalog `pg_keywords` stores keywords. `pg_get_keywords` returns the content of this table. /// Ref: [`https://www.postgresql.org/docs/15/functions-info.html`] -// TODO: change to read reserved keywords here -#[system_catalog(view, "pg_catalog.pg_keywords")] +/// +/// # Example +/// +/// ```slt +/// query TTT +/// select * from pg_keywords where word = 'add'; +/// ---- +/// add U unreserved +/// ``` #[derive(Fields)] struct PgKeywords { + #[primary_key] word: String, - catcode: String, - catdesc: String, + catcode: char, + catdesc: &'static str, +} + +#[system_catalog(table, "pg_catalog.pg_keywords")] +fn read_pg_keywords(_reader: &SysCatalogReaderImpl) -> Vec { + ALL_KEYWORDS_INDEX + .iter() + .map(|keyword| { + // FIXME: The current category is not correct. Many are different from the PostgreSQL. + let catcode = if !RESERVED_FOR_COLUMN_OR_TABLE_NAME.contains(keyword) { + 'U' + } else if !RESERVED_FOR_COLUMN_ALIAS.contains(keyword) { + 'C' + } else { + 'R' + }; + let catdesc = match catcode { + 'U' => "unreserved", + 'C' => "unreserved (cannot be function or type name)", + 'T' => "reserved (can be function or type name)", + 'R' => "reserved", + _ => unreachable!(), + }; + PgKeywords { + word: keyword.to_string().to_lowercase(), + catcode, + catdesc, + } + }) + .collect() } diff --git a/src/sqlparser/src/keywords.rs b/src/sqlparser/src/keywords.rs index 0ed69adab3ad..b61041d79c33 100644 --- a/src/sqlparser/src/keywords.rs +++ b/src/sqlparser/src/keywords.rs @@ -59,7 +59,7 @@ macro_rules! define_keywords { ]; $(kw_def!($ident $(= $string_keyword)?);)* - pub const ALL_KEYWORDS: &[&str] = &[ + pub const ALL_KEYWORDS: &[&'static str] = &[ $($ident),* ]; }; @@ -651,7 +651,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ /// Can't be used as a column or table name in PostgreSQL. /// /// This list is taken from the following table, for all "reserved" words in the PostgreSQL column, -/// includinhg "can be function or type" and "requires AS". +/// including "can be function or type" and "requires AS". /// /// `SELECT` and `WITH` were commented out because the following won't parse: /// `SELECT (SELECT 1)` or `SELECT (WITH a AS (SELECT 1) SELECT 1)`