diff --git a/.cargo/config.toml b/.cargo/config.toml index 9efb7a9c5c07..215eeab77090 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -3,13 +3,3 @@ linker = "aarch64-linux-gnu-gcc" [alias] sqlness = "run --bin sqlness-runner --" - - -[build] -rustflags = [ - # lints - # TODO: use lint configuration in cargo https://github.com/rust-lang/cargo/issues/5034 - "-Wclippy::print_stdout", - "-Wclippy::print_stderr", - "-Wclippy::implicit_clone", -] diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index bdda82fd9a2a..c9c516c576f5 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -61,6 +61,18 @@ jobs: sqlness: name: Sqlness Test - runs-on: ubuntu-20.04 + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ ubuntu-20.04 ] + steps: + - run: 'echo "No action required"' + + sqlness-kafka-wal: + name: Sqlness Test with Kafka Wal + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ ubuntu-20.04 ] steps: - run: 'echo "No action required"' diff --git a/Cargo.lock b/Cargo.lock index 450afc7dc02e..519b7af08e94 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1768,8 +1768,10 @@ dependencies = [ "arc-swap", "async-trait", "chrono-tz 0.6.3", + "common-catalog", "common-error", "common-macro", + "common-meta", "common-query", "common-runtime", "common-telemetry", @@ -1784,6 +1786,7 @@ dependencies = [ "paste", "ron", "serde", + "serde_json", "session", "snafu", "statrs", @@ -3406,6 +3409,7 @@ dependencies = [ "datatypes", "hydroflow", "itertools 0.10.5", + "num-traits", "serde", "servers", "session", @@ -6062,6 +6066,7 @@ dependencies = [ "meter-macros", "object-store", "partition", + "path-slash", "prometheus", "query", "regex", @@ -6336,6 +6341,12 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +[[package]] +name = "path-slash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42" + [[package]] name = "pathdiff" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 9f4e54d13748..1525f0c9fbbd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,11 @@ version = "0.6.0" edition = "2021" license = "Apache-2.0" +[workspace.lints] +clippy.print_stdout = "warn" +clippy.print_sterr = "warn" +clippy.implicit_clone = "warn" + [workspace.dependencies] ahash = { version = "0.8", features = ["compile-time-rng"] } aquamarine = "0.3" diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index d56524fe36a9..c2fddd474ab1 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] arrow.workspace = true chrono.workspace = true diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml index 9beea1ff51b9..d3390e6c213f 100644 --- a/src/api/Cargo.toml +++ b/src/api/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] common-base.workspace = true common-decimal.workspace = true diff --git a/src/auth/Cargo.toml b/src/auth/Cargo.toml index 748da3b40f8b..d50b63fcfcbf 100644 --- a/src/auth/Cargo.toml +++ b/src/auth/Cargo.toml @@ -8,6 +8,9 @@ license.workspace = true default = [] testing = [] +[lints] +workspace = true + [dependencies] api.workspace = true async-trait.workspace = true diff --git a/src/catalog/Cargo.toml b/src/catalog/Cargo.toml index b3355dd2d9c6..e16eadfdf8c1 100644 --- a/src/catalog/Cargo.toml +++ b/src/catalog/Cargo.toml @@ -7,6 +7,9 @@ license.workspace = true [features] testing = [] +[lints] +workspace = true + [dependencies] api.workspace = true arc-swap = "1.0" diff --git a/src/catalog/src/error.rs b/src/catalog/src/error.rs index 11cb3df96b71..cf0008ad802b 100644 --- a/src/catalog/src/error.rs +++ b/src/catalog/src/error.rs @@ -164,11 +164,8 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to find table partitions: #{table}"))] - FindPartitions { - source: partition::error::Error, - table: String, - }, + #[snafu(display("Failed to find table partitions"))] + FindPartitions { source: partition::error::Error }, #[snafu(display("Failed to find region routes"))] FindRegionRoutes { source: partition::error::Error }, diff --git a/src/catalog/src/information_schema/partitions.rs b/src/catalog/src/information_schema/partitions.rs index ecf23f8cc9ce..e7b80e2342a1 100644 --- a/src/catalog/src/information_schema/partitions.rs +++ b/src/catalog/src/information_schema/partitions.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use core::pin::pin; use std::sync::{Arc, Weak}; use arrow_schema::SchemaRef as ArrowSchemaRef; @@ -31,7 +32,7 @@ use datatypes::vectors::{ ConstantVector, DateTimeVector, DateTimeVectorBuilder, Int64Vector, Int64VectorBuilder, MutableVector, StringVector, StringVectorBuilder, UInt64VectorBuilder, }; -use futures::TryStreamExt; +use futures::{StreamExt, TryStreamExt}; use partition::manager::PartitionInfo; use partition::partition::PartitionDef; use snafu::{OptionExt, ResultExt}; @@ -240,40 +241,64 @@ impl InformationSchemaPartitionsBuilder { let predicates = Predicates::from_scan_request(&request); for schema_name in catalog_manager.schema_names(&catalog_name).await? { - let mut stream = catalog_manager.tables(&catalog_name, &schema_name).await; - - while let Some(table) = stream.try_next().await? { - let table_info = table.table_info(); - - if table_info.table_type == TableType::Temporary { - continue; - } - - let table_id = table_info.ident.table_id; - let partitions = if let Some(partition_manager) = &partition_manager { + let table_info_stream = catalog_manager + .tables(&catalog_name, &schema_name) + .await + .try_filter_map(|t| async move { + let table_info = t.table_info(); + if table_info.table_type == TableType::Temporary { + Ok(None) + } else { + Ok(Some(table_info)) + } + }); + + const BATCH_SIZE: usize = 128; + + // Split table infos into chunks + let mut table_info_chunks = pin!(table_info_stream.ready_chunks(BATCH_SIZE)); + + while let Some(table_infos) = table_info_chunks.next().await { + let table_infos = table_infos.into_iter().collect::>>()?; + let table_ids: Vec = + table_infos.iter().map(|info| info.ident.table_id).collect(); + + let mut table_partitions = if let Some(partition_manager) = &partition_manager { partition_manager - .find_table_partitions(table_id) + .batch_find_table_partitions(&table_ids) .await - .context(FindPartitionsSnafu { - table: &table_info.name, - })? + .context(FindPartitionsSnafu)? } else { // Current node must be a standalone instance, contains only one partition by default. // TODO(dennis): change it when we support multi-regions for standalone. - vec![PartitionInfo { - id: RegionId::new(table_id, 0), - partition: PartitionDef::new(vec![], vec![]), - }] + table_ids + .into_iter() + .map(|table_id| { + ( + table_id, + vec![PartitionInfo { + id: RegionId::new(table_id, 0), + partition: PartitionDef::new(vec![], vec![]), + }], + ) + }) + .collect() }; - self.add_partitions( - &predicates, - &table_info, - &catalog_name, - &schema_name, - &table_info.name, - &partitions, - ); + for table_info in table_infos { + let partitions = table_partitions + .remove(&table_info.ident.table_id) + .unwrap_or(vec![]); + + self.add_partitions( + &predicates, + &table_info, + &catalog_name, + &schema_name, + &table_info.name, + &partitions, + ); + } } } diff --git a/src/catalog/src/information_schema/region_peers.rs b/src/catalog/src/information_schema/region_peers.rs index 882ad263092c..9a436ab7f7b7 100644 --- a/src/catalog/src/information_schema/region_peers.rs +++ b/src/catalog/src/information_schema/region_peers.rs @@ -199,7 +199,7 @@ impl InformationSchemaRegionPeersBuilder { let table_routes = if let Some(partition_manager) = &partition_manager { partition_manager - .find_region_routes_batch(&table_ids) + .batch_find_region_routes(&table_ids) .await .context(FindRegionRoutesSnafu)? } else { diff --git a/src/client/Cargo.toml b/src/client/Cargo.toml index ddd13b887dbd..914c808c6661 100644 --- a/src/client/Cargo.toml +++ b/src/client/Cargo.toml @@ -7,6 +7,9 @@ license.workspace = true [features] testing = [] +[lints] +workspace = true + [dependencies] api.workspace = true arc-swap = "1.6" diff --git a/src/cmd/Cargo.toml b/src/cmd/Cargo.toml index 01fa554cb537..d6e11b68a376 100644 --- a/src/cmd/Cargo.toml +++ b/src/cmd/Cargo.toml @@ -12,6 +12,9 @@ path = "src/bin/greptime.rs" [features] tokio-console = ["common-telemetry/tokio-console"] +[lints] +workspace = true + [dependencies] anymap = "1.0.0-beta.2" async-trait.workspace = true diff --git a/src/cmd/src/cli/repl.rs b/src/cmd/src/cli/repl.rs index 6eba5059512b..a6c581122459 100644 --- a/src/cmd/src/cli/repl.rs +++ b/src/cmd/src/cli/repl.rs @@ -260,6 +260,7 @@ async fn create_query_engine(meta_addr: &str) -> Result { catalog_list, None, None, + None, false, plugins.clone(), )); diff --git a/src/cmd/src/standalone.rs b/src/cmd/src/standalone.rs index 944f8623e017..edd262e9c29a 100644 --- a/src/cmd/src/standalone.rs +++ b/src/cmd/src/standalone.rs @@ -22,7 +22,7 @@ use common_config::{metadata_store_dir, KvBackendConfig}; use common_meta::cache_invalidator::DummyCacheInvalidator; use common_meta::datanode_manager::DatanodeManagerRef; use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef}; -use common_meta::ddl::DdlTaskExecutorRef; +use common_meta::ddl::ProcedureExecutorRef; use common_meta::ddl_manager::DdlManager; use common_meta::key::{TableMetadataManager, TableMetadataManagerRef}; use common_meta::kv_backend::KvBackendRef; @@ -459,8 +459,8 @@ impl StartCommand { procedure_manager: ProcedureManagerRef, datanode_manager: DatanodeManagerRef, table_meta_allocator: TableMetadataAllocatorRef, - ) -> Result { - let ddl_task_executor: DdlTaskExecutorRef = Arc::new( + ) -> Result { + let procedure_executor: ProcedureExecutorRef = Arc::new( DdlManager::try_new( procedure_manager, datanode_manager, @@ -472,7 +472,7 @@ impl StartCommand { .context(InitDdlManagerSnafu)?, ); - Ok(ddl_task_executor) + Ok(procedure_executor) } pub async fn create_table_metadata_manager( diff --git a/src/common/base/Cargo.toml b/src/common/base/Cargo.toml index 54926afcc421..db8d13e5e56f 100644 --- a/src/common/base/Cargo.toml +++ b/src/common/base/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] anymap = "1.0.0-beta.2" bitvec = "1.0" diff --git a/src/common/catalog/Cargo.toml b/src/common/catalog/Cargo.toml index 3acac32fe0c0..61f49ab0e4b3 100644 --- a/src/common/catalog/Cargo.toml +++ b/src/common/catalog/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] common-error.workspace = true common-macro.workspace = true diff --git a/src/common/config/Cargo.toml b/src/common/config/Cargo.toml index 829c2ee53b0c..dccd1f2df4a3 100644 --- a/src/common/config/Cargo.toml +++ b/src/common/config/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] common-base.workspace = true humantime-serde.workspace = true diff --git a/src/common/datasource/Cargo.toml b/src/common/datasource/Cargo.toml index 1e456b40c860..8f11043afd73 100644 --- a/src/common/datasource/Cargo.toml +++ b/src/common/datasource/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] arrow.workspace = true arrow-schema.workspace = true diff --git a/src/common/decimal/Cargo.toml b/src/common/decimal/Cargo.toml index adf9b08446a8..671364d49fae 100644 --- a/src/common/decimal/Cargo.toml +++ b/src/common/decimal/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] arrow.workspace = true bigdecimal.workspace = true diff --git a/src/common/error/Cargo.toml b/src/common/error/Cargo.toml index ab157787cab2..92ab12dd07f5 100644 --- a/src/common/error/Cargo.toml +++ b/src/common/error/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] snafu.workspace = true strum.workspace = true diff --git a/src/common/error/src/lib.rs b/src/common/error/src/lib.rs index aa3c915e84e3..aa54ef39e78f 100644 --- a/src/common/error/src/lib.rs +++ b/src/common/error/src/lib.rs @@ -19,7 +19,9 @@ pub mod format; pub mod mock; pub mod status_code; +pub use snafu; + +// HACK - these headers are here for shared in gRPC services. For common HTTP headers, +// please define in `src/servers/src/http/header.rs`. pub const GREPTIME_DB_HEADER_ERROR_CODE: &str = "x-greptime-err-code"; pub const GREPTIME_DB_HEADER_ERROR_MSG: &str = "x-greptime-err-msg"; - -pub use snafu; diff --git a/src/common/function/Cargo.toml b/src/common/function/Cargo.toml index 7053d10771ad..93fef8593cc1 100644 --- a/src/common/function/Cargo.toml +++ b/src/common/function/Cargo.toml @@ -4,13 +4,18 @@ edition.workspace = true version.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] api.workspace = true arc-swap = "1.0" async-trait.workspace = true chrono-tz = "0.6" +common-catalog.workspace = true common-error.workspace = true common-macro.workspace = true +common-meta.workspace = true common-query.workspace = true common-runtime.workspace = true common-telemetry.workspace = true @@ -23,6 +28,8 @@ num = "0.4" num-traits = "0.2" once_cell.workspace = true paste = "1.0" +serde.workspace = true +serde_json.workspace = true session.workspace = true snafu.workspace = true statrs = "0.16" diff --git a/src/common/function/src/function.rs b/src/common/function/src/function.rs index f47486da4502..8da7f344bbc3 100644 --- a/src/common/function/src/function.rs +++ b/src/common/function/src/function.rs @@ -30,6 +30,17 @@ pub struct FunctionContext { pub state: Arc, } +impl FunctionContext { + /// Create a mock [`FunctionContext`] for test. + #[cfg(any(test, feature = "testing"))] + pub fn mock() -> Self { + Self { + query_ctx: QueryContextBuilder::default().build(), + state: Arc::new(FunctionState::mock()), + } + } +} + impl Default for FunctionContext { fn default() -> Self { Self { diff --git a/src/common/function/src/handlers.rs b/src/common/function/src/handlers.rs index 352009fc78b0..629f55e32235 100644 --- a/src/common/function/src/handlers.rs +++ b/src/common/function/src/handlers.rs @@ -13,10 +13,9 @@ // limitations under the License. use std::sync::Arc; -use std::time::Duration; -use api::v1::meta::ProcedureStateResponse; use async_trait::async_trait; +use common_meta::rpc::procedure::{MigrateRegionRequest, ProcedureStateResponse}; use common_query::error::Result; use session::context::QueryContextRef; use table::requests::{DeleteRequest, InsertRequest}; @@ -31,24 +30,18 @@ pub trait TableMutationHandler: Send + Sync { /// Delete rows from the table. async fn delete(&self, request: DeleteRequest, ctx: QueryContextRef) -> Result; - - /// Migrate a region from source peer to target peer, returns the procedure id if success. - async fn migrate_region( - &self, - region_id: u64, - from_peer: u64, - to_peer: u64, - replay_timeout: Duration, - ) -> Result; } -/// A trait for handling meta service requests in `QueryEngine`. +/// A trait for handling procedure service requests in `QueryEngine`. #[async_trait] -pub trait MetaServiceHandler: Send + Sync { +pub trait ProcedureServiceHandler: Send + Sync { + /// Migrate a region from source peer to target peer, returns the procedure id if success. + async fn migrate_region(&self, request: MigrateRegionRequest) -> Result>; + /// Query the procedure' state by its id async fn query_procedure_state(&self, pid: &str) -> Result; } pub type TableMutationHandlerRef = Arc; -pub type MetaServiceHandlerRef = Arc; +pub type ProcedureServiceHandlerRef = Arc; diff --git a/src/common/function/src/lib.rs b/src/common/function/src/lib.rs index 10fbf13a7a05..1d37d7068c98 100644 --- a/src/common/function/src/lib.rs +++ b/src/common/function/src/lib.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod macros; pub mod scalars; mod system; mod table; diff --git a/src/common/function/src/macros.rs b/src/common/function/src/macros.rs new file mode 100644 index 000000000000..c8b03e816301 --- /dev/null +++ b/src/common/function/src/macros.rs @@ -0,0 +1,27 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Ensure current function is invokded under `greptime` catalog. +#[macro_export] +macro_rules! ensure_greptime { + ($func_ctx: expr) => {{ + use common_catalog::consts::DEFAULT_CATALOG_NAME; + snafu::ensure!( + $func_ctx.query_ctx.current_catalog() == DEFAULT_CATALOG_NAME, + common_query::error::PermissionDeniedSnafu { + err_msg: format!("current catalog is not {DEFAULT_CATALOG_NAME}") + } + ); + }}; +} diff --git a/src/common/function/src/state.rs b/src/common/function/src/state.rs index a5a4935cddac..418509dc52e9 100644 --- a/src/common/function/src/state.rs +++ b/src/common/function/src/state.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::handlers::{MetaServiceHandlerRef, TableMutationHandlerRef}; +use crate::handlers::{ProcedureServiceHandlerRef, TableMutationHandlerRef}; /// Shared state for SQL functions. /// The handlers in state may be `None` in cli command-line or test cases. @@ -20,6 +20,45 @@ use crate::handlers::{MetaServiceHandlerRef, TableMutationHandlerRef}; pub struct FunctionState { // The table mutation handler pub table_mutation_handler: Option, - // The meta service handler - pub meta_service_handler: Option, + // The procedure service handler + pub procedure_service_handler: Option, +} + +impl FunctionState { + /// Create a mock [`FunctionState`] for test. + #[cfg(any(test, feature = "testing"))] + pub fn mock() -> Self { + use std::sync::Arc; + + use api::v1::meta::ProcedureStatus; + use async_trait::async_trait; + use common_meta::rpc::procedure::{MigrateRegionRequest, ProcedureStateResponse}; + use common_query::error::Result; + + use crate::handlers::ProcedureServiceHandler; + struct MockProcedureServiceHandler; + + #[async_trait] + impl ProcedureServiceHandler for MockProcedureServiceHandler { + async fn migrate_region( + &self, + _request: MigrateRegionRequest, + ) -> Result> { + Ok(Some("test_pid".to_string())) + } + + async fn query_procedure_state(&self, _pid: &str) -> Result { + Ok(ProcedureStateResponse { + status: ProcedureStatus::Done.into(), + error: "OK".to_string(), + ..Default::default() + }) + } + } + + Self { + table_mutation_handler: None, + procedure_service_handler: Some(Arc::new(MockProcedureServiceHandler)), + } + } } diff --git a/src/common/function/src/system.rs b/src/common/function/src/system.rs index 94beda6966f9..b50dbfba07b6 100644 --- a/src/common/function/src/system.rs +++ b/src/common/function/src/system.rs @@ -14,6 +14,7 @@ mod build; mod database; +mod procedure_state; mod timezone; mod version; @@ -21,6 +22,7 @@ use std::sync::Arc; use build::BuildFunction; use database::DatabaseFunction; +use procedure_state::ProcedureStateFunction; use timezone::TimezoneFunction; use version::VersionFunction; @@ -34,5 +36,6 @@ impl SystemFunction { registry.register(Arc::new(VersionFunction)); registry.register(Arc::new(DatabaseFunction)); registry.register(Arc::new(TimezoneFunction)); + registry.register(Arc::new(ProcedureStateFunction)); } } diff --git a/src/common/function/src/system/build.rs b/src/common/function/src/system/build.rs index ce9e77fdfb7c..925b262bcdb6 100644 --- a/src/common/function/src/system/build.rs +++ b/src/common/function/src/system/build.rs @@ -22,7 +22,7 @@ use datatypes::vectors::{StringVector, VectorRef}; use crate::function::{Function, FunctionContext}; -/// Generates build information +/// Generates build information #[derive(Clone, Debug, Default)] pub struct BuildFunction; @@ -42,11 +42,7 @@ impl Function for BuildFunction { } fn signature(&self) -> Signature { - Signature::uniform( - 0, - vec![ConcreteDataType::string_datatype()], - Volatility::Immutable, - ) + Signature::uniform(0, vec![], Volatility::Immutable) } fn eval(&self, _func_ctx: FunctionContext, _columns: &[VectorRef]) -> Result { @@ -75,7 +71,7 @@ mod tests { Signature { type_signature: TypeSignature::Uniform(0, valid_types), volatility: Volatility::Immutable - } if valid_types == vec![ConcreteDataType::string_datatype()] + } if valid_types.is_empty() )); let build_info = common_version::build_info().to_string(); let vector = build.eval(FunctionContext::default(), &[]).unwrap(); diff --git a/src/common/function/src/system/procedure_state.rs b/src/common/function/src/system/procedure_state.rs new file mode 100644 index 000000000000..4f6305078465 --- /dev/null +++ b/src/common/function/src/system/procedure_state.rs @@ -0,0 +1,216 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt; +use std::sync::Arc; + +use api::v1::meta::ProcedureStatus; +use common_meta::rpc::procedure::ProcedureStateResponse; +use common_query::error::Error::ThreadJoin; +use common_query::error::{ + InvalidFuncArgsSnafu, MissingProcedureServiceHandlerSnafu, Result, + UnsupportedInputDataTypeSnafu, +}; +use common_query::prelude::{Signature, Volatility}; +use common_telemetry::error; +use datatypes::prelude::*; +use datatypes::vectors::{ConstantVector, Helper, StringVector, VectorRef}; +use serde::Serialize; +use snafu::{ensure, Location, OptionExt}; + +use crate::function::{Function, FunctionContext}; + +const NAME: &str = "procedure_state"; + +/// A function to query procedure state by its id. +/// Such as `procedure_state(pid)`. +#[derive(Clone, Debug, Default)] +pub struct ProcedureStateFunction; + +impl fmt::Display for ProcedureStateFunction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "PROCEDURE_STATE") + } +} + +#[derive(Serialize)] +struct ProcedureStateJson { + status: String, + #[serde(skip_serializing_if = "Option::is_none")] + error: Option, +} + +impl Function for ProcedureStateFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::string_datatype()) + } + + fn signature(&self) -> Signature { + Signature::uniform( + 1, + vec![ConcreteDataType::string_datatype()], + Volatility::Immutable, + ) + } + + fn eval(&self, func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + crate::ensure_greptime!(func_ctx); + + ensure!( + columns.len() == 1, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect 1, have: {}", + columns.len() + ), + } + ); + + let pids = columns[0].clone(); + let expect_len = pids.len(); + let is_const = pids.is_const(); + + match pids.data_type() { + ConcreteDataType::String(_) => { + // TODO(dennis): datafusion UDF doesn't support async function currently + std::thread::spawn(move || { + let pids: &StringVector = if is_const { + let pids: &ConstantVector = unsafe { Helper::static_cast(&pids) }; + unsafe { Helper::static_cast(pids.inner()) } + } else { + unsafe { Helper::static_cast(&pids) } + }; + + let procedure_service_handler = func_ctx + .state + .procedure_service_handler + .as_ref() + .context(MissingProcedureServiceHandlerSnafu)?; + + let states = pids + .iter_data() + .map(|pid| { + if let Some(pid) = pid { + let ProcedureStateResponse { status, error, .. } = + common_runtime::block_on_read(async move { + procedure_service_handler.query_procedure_state(pid).await + })?; + + let status = ProcedureStatus::try_from(status) + .map(|v| v.as_str_name()) + .unwrap_or("Unknown"); + + let state = ProcedureStateJson { + status: status.to_string(), + error: if error.is_empty() { None } else { Some(error) }, + }; + + Ok(Some(serde_json::to_string(&state).unwrap_or_default())) + } else { + Ok(None) + } + }) + .collect::>>()?; + + let results: VectorRef = Arc::new(StringVector::from(states)); + + if is_const { + Ok(Arc::new(ConstantVector::new(results, expect_len)) as _) + } else { + Ok(results) + } + }) + .join() + .map_err(|e| { + error!(e; "Join thread error"); + ThreadJoin { + location: Location::default(), + } + })? + } + _ => UnsupportedInputDataTypeSnafu { + function: NAME, + datatypes: columns.iter().map(|c| c.data_type()).collect::>(), + } + .fail(), + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use common_query::prelude::TypeSignature; + use datatypes::vectors::StringVector; + + use super::*; + + #[test] + fn test_procedure_state_misc() { + let f = ProcedureStateFunction; + assert_eq!("procedure_state", f.name()); + assert_eq!( + ConcreteDataType::string_datatype(), + f.return_type(&[]).unwrap() + ); + assert!(matches!(f.signature(), + Signature { + type_signature: TypeSignature::Uniform(1, valid_types), + volatility: Volatility::Immutable + } if valid_types == vec![ConcreteDataType::string_datatype()] + )); + } + + #[test] + fn test_missing_procedure_service() { + let f = ProcedureStateFunction; + + let args = vec!["pid"]; + + let args = args + .into_iter() + .map(|arg| Arc::new(StringVector::from_slice(&[arg])) as _) + .collect::>(); + + let result = f.eval(FunctionContext::default(), &args).unwrap_err(); + assert_eq!( + "Missing ProcedureServiceHandler, not expected", + result.to_string() + ); + } + + #[test] + fn test_procedure_state() { + let f = ProcedureStateFunction; + + let args = vec!["pid"]; + + let args = args + .into_iter() + .map(|arg| Arc::new(StringVector::from_slice(&[arg])) as _) + .collect::>(); + + let result = f.eval(FunctionContext::mock(), &args).unwrap(); + + let expect: VectorRef = Arc::new(StringVector::from(vec![ + "{\"status\":\"Done\",\"error\":\"OK\"}", + ])); + assert_eq!(expect, result); + } +} diff --git a/src/common/function/src/table/migrate_region.rs b/src/common/function/src/table/migrate_region.rs index f969bada02d3..6447c6de6b3d 100644 --- a/src/common/function/src/table/migrate_region.rs +++ b/src/common/function/src/table/migrate_region.rs @@ -15,9 +15,10 @@ use std::fmt::{self}; use std::time::Duration; +use common_meta::rpc::procedure::MigrateRegionRequest; use common_query::error::Error::ThreadJoin; use common_query::error::{ - InvalidFuncArgsSnafu, InvalidInputTypeSnafu, MissingTableMutationHandlerSnafu, Result, + InvalidFuncArgsSnafu, InvalidInputTypeSnafu, MissingProcedureServiceHandlerSnafu, Result, }; use common_query::prelude::{Signature, TypeSignature, Volatility}; use common_telemetry::logging::error; @@ -77,6 +78,8 @@ impl Function for MigrateRegionFunction { } fn eval(&self, func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + crate::ensure_greptime!(func_ctx); + let (region_ids, from_peers, to_peers, replay_timeouts) = match columns.len() { 3 => { let region_ids = cast_u64_vector(&columns[0])?; @@ -106,9 +109,15 @@ impl Function for MigrateRegionFunction { } }; + // TODO(dennis): datafusion UDF doesn't support async function currently std::thread::spawn(move || { let len = region_ids.len(); let mut results = StringVectorBuilder::with_capacity(len); + let procedure_service_handler = func_ctx + .state + .procedure_service_handler + .as_ref() + .context(MissingProcedureServiceHandlerSnafu)?; for index in 0..len { let region_id = region_ids.get(index); @@ -126,24 +135,18 @@ impl Function for MigrateRegionFunction { Value::UInt64(to_peer), Value::UInt64(replay_timeout), ) => { - let func_ctx = func_ctx.clone(); - let pid = common_runtime::block_on_read(async move { - func_ctx - .state - .table_mutation_handler - .as_ref() - .context(MissingTableMutationHandlerSnafu)? - .migrate_region( + procedure_service_handler + .migrate_region(MigrateRegionRequest { region_id, from_peer, to_peer, - Duration::from_secs(replay_timeout), - ) + replay_timeout: Duration::from_secs(replay_timeout), + }) .await })?; - results.push(Some(&pid)); + results.push(pid.as_deref()) } _ => { results.push(None); @@ -171,5 +174,60 @@ impl fmt::Display for MigrateRegionFunction { #[cfg(test)] mod tests { - // FIXME(dennis): test in the following PR. + use std::sync::Arc; + + use common_query::prelude::TypeSignature; + use datatypes::vectors::{StringVector, UInt64Vector}; + + use super::*; + + #[test] + fn test_migrate_region_misc() { + let f = MigrateRegionFunction; + assert_eq!("migrate_region", f.name()); + assert_eq!( + ConcreteDataType::string_datatype(), + f.return_type(&[]).unwrap() + ); + assert!(matches!(f.signature(), + Signature { + type_signature: TypeSignature::OneOf(sigs), + volatility: Volatility::Immutable + } if sigs.len() == 2)); + } + + #[test] + fn test_missing_procedure_service() { + let f = MigrateRegionFunction; + + let args = vec![1, 1, 1]; + + let args = args + .into_iter() + .map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _) + .collect::>(); + + let result = f.eval(FunctionContext::default(), &args).unwrap_err(); + assert_eq!( + "Missing ProcedureServiceHandler, not expected", + result.to_string() + ); + } + + #[test] + fn test_migrate_region() { + let f = MigrateRegionFunction; + + let args = vec![1, 1, 1]; + + let args = args + .into_iter() + .map(|arg| Arc::new(UInt64Vector::from_slice([arg])) as _) + .collect::>(); + + let result = f.eval(FunctionContext::mock(), &args).unwrap(); + + let expect: VectorRef = Arc::new(StringVector::from(vec!["test_pid"])); + assert_eq!(expect, result); + } } diff --git a/src/common/greptimedb-telemetry/Cargo.toml b/src/common/greptimedb-telemetry/Cargo.toml index 991f40890ee5..6bff7a719e91 100644 --- a/src/common/greptimedb-telemetry/Cargo.toml +++ b/src/common/greptimedb-telemetry/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] async-trait.workspace = true common-error.workspace = true diff --git a/src/common/grpc-expr/Cargo.toml b/src/common/grpc-expr/Cargo.toml index d8ae57980cef..3415b54200b3 100644 --- a/src/common/grpc-expr/Cargo.toml +++ b/src/common/grpc-expr/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] api.workspace = true async-trait.workspace = true diff --git a/src/common/grpc/Cargo.toml b/src/common/grpc/Cargo.toml index 9c71d5786039..0f11d6873158 100644 --- a/src/common/grpc/Cargo.toml +++ b/src/common/grpc/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] api.workspace = true arrow-flight.workspace = true diff --git a/src/common/macro/Cargo.toml b/src/common/macro/Cargo.toml index 1ba1c2bbc8c5..64080739aa1e 100644 --- a/src/common/macro/Cargo.toml +++ b/src/common/macro/Cargo.toml @@ -7,6 +7,9 @@ license.workspace = true [lib] proc-macro = true +[lints] +workspace = true + [dependencies] proc-macro2 = "1.0.66" quote = "1.0" diff --git a/src/common/mem-prof/Cargo.toml b/src/common/mem-prof/Cargo.toml index c30b6334df6e..666565264508 100644 --- a/src/common/mem-prof/Cargo.toml +++ b/src/common/mem-prof/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] common-error.workspace = true common-macro.workspace = true diff --git a/src/common/meta/Cargo.toml b/src/common/meta/Cargo.toml index c6feeddacb87..554b0d6d795d 100644 --- a/src/common/meta/Cargo.toml +++ b/src/common/meta/Cargo.toml @@ -7,6 +7,9 @@ license.workspace = true [features] testing = [] +[lints] +workspace = true + [dependencies] api.workspace = true async-recursion = "1.0" diff --git a/src/common/meta/src/ddl.rs b/src/common/meta/src/ddl.rs index 4a8335ef3087..d5d790f95838 100644 --- a/src/common/meta/src/ddl.rs +++ b/src/common/meta/src/ddl.rs @@ -26,6 +26,7 @@ use crate::key::table_route::TableRouteValue; use crate::key::TableMetadataManagerRef; use crate::region_keeper::MemoryRegionKeeperRef; use crate::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse}; +use crate::rpc::procedure::{MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse}; pub mod alter_table; pub mod create_logical_tables; @@ -46,16 +47,32 @@ pub struct ExecutorContext { pub tracing_context: Option, } +/// The procedure executor that accepts ddl, region migration task etc. #[async_trait::async_trait] -pub trait DdlTaskExecutor: Send + Sync { +pub trait ProcedureExecutor: Send + Sync { + /// Submit a ddl task async fn submit_ddl_task( &self, ctx: &ExecutorContext, request: SubmitDdlTaskRequest, ) -> Result; + + /// Submit a region migration task + async fn migrate_region( + &self, + ctx: &ExecutorContext, + request: MigrateRegionRequest, + ) -> Result; + + /// Query the procedure state by its id + async fn query_procedure_state( + &self, + ctx: &ExecutorContext, + pid: &str, + ) -> Result; } -pub type DdlTaskExecutorRef = Arc; +pub type ProcedureExecutorRef = Arc; pub struct TableMetadataAllocatorContext { pub cluster_id: u64, diff --git a/src/common/meta/src/ddl_manager.rs b/src/common/meta/src/ddl_manager.rs index efad73dae6a3..4a760cafcee3 100644 --- a/src/common/meta/src/ddl_manager.rs +++ b/src/common/meta/src/ddl_manager.rs @@ -28,10 +28,10 @@ use crate::ddl::create_table::CreateTableProcedure; use crate::ddl::drop_table::DropTableProcedure; use crate::ddl::table_meta::TableMetadataAllocatorRef; use crate::ddl::truncate_table::TruncateTableProcedure; -use crate::ddl::{utils, DdlContext, DdlTaskExecutor, ExecutorContext}; +use crate::ddl::{utils, DdlContext, ExecutorContext, ProcedureExecutor}; use crate::error::{ self, EmptyCreateTableTasksSnafu, ProcedureOutputSnafu, RegisterProcedureLoaderSnafu, Result, - SubmitProcedureSnafu, TableNotFoundSnafu, WaitProcedureSnafu, + SubmitProcedureSnafu, TableNotFoundSnafu, UnsupportedSnafu, WaitProcedureSnafu, }; use crate::key::table_info::TableInfoValue; use crate::key::table_name::TableNameKey; @@ -46,6 +46,8 @@ use crate::rpc::ddl::{ AlterTableTask, CreateTableTask, DropTableTask, SubmitDdlTaskRequest, SubmitDdlTaskResponse, TruncateTableTask, }; +use crate::rpc::procedure; +use crate::rpc::procedure::{MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse}; use crate::rpc::router::RegionRoute; use crate::table_name::TableName; use crate::ClusterId; @@ -527,8 +529,9 @@ async fn handle_create_logical_table_tasks( }) } +/// TODO(dennis): let [`DdlManager`] implement [`ProcedureExecutor`] looks weird, find some way to refactor it. #[async_trait::async_trait] -impl DdlTaskExecutor for DdlManager { +impl ProcedureExecutor for DdlManager { async fn submit_ddl_task( &self, ctx: &ExecutorContext, @@ -566,6 +569,37 @@ impl DdlTaskExecutor for DdlManager { .trace(span) .await } + + async fn migrate_region( + &self, + _ctx: &ExecutorContext, + _request: MigrateRegionRequest, + ) -> Result { + UnsupportedSnafu { + operation: "migrate_region", + } + .fail() + } + + async fn query_procedure_state( + &self, + _ctx: &ExecutorContext, + pid: &str, + ) -> Result { + let pid = ProcedureId::parse_str(pid) + .with_context(|_| error::ParseProcedureIdSnafu { key: pid })?; + + let state = self + .procedure_manager + .procedure_state(pid) + .await + .context(error::QueryProcedureSnafu)? + .context(error::ProcedureNotFoundSnafu { + pid: pid.to_string(), + })?; + + Ok(procedure::procedure_state_to_pb_response(&state)) + } } #[cfg(test)] diff --git a/src/common/meta/src/error.rs b/src/common/meta/src/error.rs index dc4c0cf51cec..32af562e30f8 100644 --- a/src/common/meta/src/error.rs +++ b/src/common/meta/src/error.rs @@ -100,6 +100,15 @@ pub enum Error { source: common_procedure::Error, }, + #[snafu(display("Failed to query procedure"))] + QueryProcedure { + location: Location, + source: common_procedure::Error, + }, + + #[snafu(display("Procedure not found: {pid}"))] + ProcedureNotFound { location: Location, pid: String }, + #[snafu(display("Failed to parse procedure id: {key}"))] ParseProcedureId { location: Location, @@ -431,14 +440,17 @@ impl ErrorExt for Error { | RenameTable { .. } | Unsupported { .. } => StatusCode::Internal, - PrimaryKeyNotFound { .. } | EmptyKey { .. } | InvalidEngineType { .. } => { - StatusCode::InvalidArguments - } + ProcedureNotFound { .. } + | PrimaryKeyNotFound { .. } + | EmptyKey { .. } + | InvalidEngineType { .. } => StatusCode::InvalidArguments, TableNotFound { .. } => StatusCode::TableNotFound, TableAlreadyExists { .. } => StatusCode::TableAlreadyExists, - SubmitProcedure { source, .. } | WaitProcedure { source, .. } => source.status_code(), + SubmitProcedure { source, .. } + | QueryProcedure { source, .. } + | WaitProcedure { source, .. } => source.status_code(), RegisterProcedureLoader { source, .. } => source.status_code(), External { source, .. } => source.status_code(), OperateDatanode { source, .. } => source.status_code(), diff --git a/src/common/meta/src/rpc/procedure.rs b/src/common/meta/src/rpc/procedure.rs index 9e64edb715c8..b4de8747df21 100644 --- a/src/common/meta/src/rpc/procedure.rs +++ b/src/common/meta/src/rpc/procedure.rs @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::time::Duration; + +pub use api::v1::meta::{MigrateRegionResponse, ProcedureStateResponse}; use api::v1::meta::{ ProcedureId as PbProcedureId, ProcedureStateResponse as PbProcedureStateResponse, ProcedureStatus as PbProcedureStatus, @@ -21,6 +24,15 @@ use snafu::ResultExt; use crate::error::{ParseProcedureIdSnafu, Result}; +/// A request to migrate region. +#[derive(Clone)] +pub struct MigrateRegionRequest { + pub region_id: u64, + pub from_peer: u64, + pub to_peer: u64, + pub replay_timeout: Duration, +} + /// Cast the protobuf [`ProcedureId`] to common [`ProcedureId`]. pub fn pb_pid_to_pid(pid: &PbProcedureId) -> Result { ProcedureId::parse_str(&String::from_utf8_lossy(&pid.key)).with_context(|_| { diff --git a/src/common/plugins/Cargo.toml b/src/common/plugins/Cargo.toml index 8fa1069158b4..92dc34201474 100644 --- a/src/common/plugins/Cargo.toml +++ b/src/common/plugins/Cargo.toml @@ -4,6 +4,7 @@ version.workspace = true edition.workspace = true license.workspace = true -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lints] +workspace = true [dependencies] diff --git a/src/common/procedure-test/Cargo.toml b/src/common/procedure-test/Cargo.toml index 93c379148b4e..2e7674727056 100644 --- a/src/common/procedure-test/Cargo.toml +++ b/src/common/procedure-test/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] async-trait.workspace = true common-procedure.workspace = true diff --git a/src/common/procedure/Cargo.toml b/src/common/procedure/Cargo.toml index 795df7eea9ea..af755b2b87f3 100644 --- a/src/common/procedure/Cargo.toml +++ b/src/common/procedure/Cargo.toml @@ -7,6 +7,9 @@ license.workspace = true [features] testing = [] +[lints] +workspace = true + [dependencies] async-stream.workspace = true async-trait.workspace = true diff --git a/src/common/query/Cargo.toml b/src/common/query/Cargo.toml index 0232722499ac..443640016488 100644 --- a/src/common/query/Cargo.toml +++ b/src/common/query/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] api.workspace = true async-trait.workspace = true diff --git a/src/common/query/src/error.rs b/src/common/query/src/error.rs index 49d8f35e39f1..758ec214b6a1 100644 --- a/src/common/query/src/error.rs +++ b/src/common/query/src/error.rs @@ -178,14 +178,23 @@ pub enum Error { location: Location, }, + #[snafu(display("Failed to do procedure task"))] + ProcedureService { + source: BoxedError, + location: Location, + }, + #[snafu(display("Missing TableMutationHandler, not expected"))] MissingTableMutationHandler { location: Location }, - #[snafu(display("Missing MetaServiceHandler, not expected"))] - MissingMetaServiceHandler { location: Location }, + #[snafu(display("Missing ProcedureServiceHandler, not expected"))] + MissingProcedureServiceHandler { location: Location }, #[snafu(display("Invalid function args: {}", err_msg))] InvalidFuncArgs { err_msg: String, location: Location }, + + #[snafu(display("Permission denied: {}", err_msg))] + PermissionDenied { err_msg: String, location: Location }, } pub type Result = std::result::Result; @@ -213,7 +222,7 @@ impl ErrorExt for Error { | Error::FromArrowArray { source, .. } => source.status_code(), Error::MissingTableMutationHandler { .. } - | Error::MissingMetaServiceHandler { .. } + | Error::MissingProcedureServiceHandler { .. } | Error::ExecuteRepeatedly { .. } | Error::ThreadJoin { .. } | Error::GeneralDataFusion { .. } => StatusCode::Unexpected, @@ -225,7 +234,11 @@ impl ErrorExt for Error { Error::ConvertDfRecordBatchStream { source, .. } => source.status_code(), Error::ExecutePhysicalPlan { source, .. } => source.status_code(), Error::Execute { source, .. } => source.status_code(), - Error::TableMutation { source, .. } => source.status_code(), + Error::ProcedureService { source, .. } | Error::TableMutation { source, .. } => { + source.status_code() + } + + Error::PermissionDenied { .. } => StatusCode::PermissionDenied, } } diff --git a/src/common/recordbatch/Cargo.toml b/src/common/recordbatch/Cargo.toml index ac91e934cc7d..01d051067b94 100644 --- a/src/common/recordbatch/Cargo.toml +++ b/src/common/recordbatch/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] arc-swap = "1.6" common-base.workspace = true diff --git a/src/common/runtime/Cargo.toml b/src/common/runtime/Cargo.toml index 76dc0aa9ffa0..a6da1f571fc2 100644 --- a/src/common/runtime/Cargo.toml +++ b/src/common/runtime/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] async-trait.workspace = true common-error.workspace = true diff --git a/src/common/substrait/Cargo.toml b/src/common/substrait/Cargo.toml index 891cba1d833f..1c4f4047977b 100644 --- a/src/common/substrait/Cargo.toml +++ b/src/common/substrait/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] async-recursion = "1.0" async-trait.workspace = true diff --git a/src/common/telemetry/Cargo.toml b/src/common/telemetry/Cargo.toml index d25e89e6575f..6a8dc96b460b 100644 --- a/src/common/telemetry/Cargo.toml +++ b/src/common/telemetry/Cargo.toml @@ -8,6 +8,9 @@ license.workspace = true tokio-console = ["console-subscriber", "tokio/tracing"] deadlock_detection = ["parking_lot/deadlock_detection"] +[lints] +workspace = true + [dependencies] atty = "0.2" backtrace = "0.3" diff --git a/src/common/test-util/Cargo.toml b/src/common/test-util/Cargo.toml index 310fb45a7e35..2b66dd45ce3a 100644 --- a/src/common/test-util/Cargo.toml +++ b/src/common/test-util/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] client.workspace = true common-query.workspace = true diff --git a/src/common/time/Cargo.toml b/src/common/time/Cargo.toml index 032520ffee18..fdd06140f187 100644 --- a/src/common/time/Cargo.toml +++ b/src/common/time/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] arrow.workspace = true chrono.workspace = true diff --git a/src/common/version/Cargo.toml b/src/common/version/Cargo.toml index 7dd296e0d507..a766329843bf 100644 --- a/src/common/version/Cargo.toml +++ b/src/common/version/Cargo.toml @@ -4,5 +4,8 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] build-data = "0.1.4" diff --git a/src/common/wal/Cargo.toml b/src/common/wal/Cargo.toml index b70ef3740e0e..3b84673bb1ee 100644 --- a/src/common/wal/Cargo.toml +++ b/src/common/wal/Cargo.toml @@ -7,6 +7,9 @@ license.workspace = true [features] testing = [] +[lints] +workspace = true + [dependencies] common-base.workspace = true common-telemetry.workspace = true diff --git a/src/datanode/Cargo.toml b/src/datanode/Cargo.toml index dc50c905f40a..38fe11613139 100644 --- a/src/datanode/Cargo.toml +++ b/src/datanode/Cargo.toml @@ -7,6 +7,9 @@ license.workspace = true [features] testing = [] +[lints] +workspace = true + [dependencies] api.workspace = true arrow-flight.workspace = true diff --git a/src/datanode/src/datanode.rs b/src/datanode/src/datanode.rs index 7480b2cfa73f..2d36e53eed8b 100644 --- a/src/datanode/src/datanode.rs +++ b/src/datanode/src/datanode.rs @@ -310,6 +310,7 @@ impl DatanodeBuilder { MemoryCatalogManager::with_default_setup(), None, None, + None, false, self.plugins.clone(), ); diff --git a/src/datatypes/Cargo.toml b/src/datatypes/Cargo.toml index ce40c640d492..ee8254627260 100644 --- a/src/datatypes/Cargo.toml +++ b/src/datatypes/Cargo.toml @@ -8,6 +8,9 @@ license.workspace = true default = [] test = [] +[lints] +workspace = true + [dependencies] arrow.workspace = true arrow-array.workspace = true diff --git a/src/file-engine/Cargo.toml b/src/file-engine/Cargo.toml index f0938c545bad..3ce83b6b791c 100644 --- a/src/file-engine/Cargo.toml +++ b/src/file-engine/Cargo.toml @@ -8,6 +8,9 @@ license.workspace = true default = [] test = ["common-test-util"] +[lints] +workspace = true + [dependencies] api = { workspace = true, optional = true } async-trait = "0.1" diff --git a/src/flow/Cargo.toml b/src/flow/Cargo.toml index f20aa5d07e4c..0dc614a6c06c 100644 --- a/src/flow/Cargo.toml +++ b/src/flow/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] api.workspace = true bimap = "0.6.3" @@ -17,6 +20,7 @@ common-time.workspace = true datatypes.workspace = true hydroflow = "0.5.0" itertools.workspace = true +num-traits = "0.2" serde.workspace = true servers.workspace = true session.workspace = true diff --git a/src/flow/src/expr/error.rs b/src/flow/src/expr/error.rs index 0fd58ba1cf8f..233538fb6564 100644 --- a/src/flow/src/expr/error.rs +++ b/src/flow/src/expr/error.rs @@ -58,4 +58,7 @@ pub enum EvalError { #[snafu(display("Optimize error: {reason}"))] Optimize { reason: String, location: Location }, + + #[snafu(display("Unsupported temporal filter: {reason}"))] + UnsupportedTemporalFilter { reason: String, location: Location }, } diff --git a/src/flow/src/expr/func.rs b/src/flow/src/expr/func.rs index eed43f65a759..85a127f09a4d 100644 --- a/src/flow/src/expr/func.rs +++ b/src/flow/src/expr/func.rs @@ -21,14 +21,12 @@ use hydroflow::bincode::Error; use serde::{Deserialize, Serialize}; use snafu::ResultExt; -use super::ScalarExpr; -use crate::expr::error::CastValueSnafu; -use crate::expr::InvalidArgumentSnafu; -// TODO(discord9): more function & eval -use crate::{ - expr::error::{EvalError, TryFromValueSnafu, TypeMismatchSnafu}, - repr::Row, +use crate::expr::error::{ + CastValueSnafu, DivisionByZeroSnafu, EvalError, InternalSnafu, TryFromValueSnafu, + TypeMismatchSnafu, }; +use crate::expr::{InvalidArgumentSnafu, ScalarExpr}; +use crate::repr::Row; /// UnmaterializableFunc is a function that can't be eval independently, /// and require special handling @@ -47,6 +45,66 @@ pub enum UnaryFunc { StepTimestamp, Cast(ConcreteDataType), } + +impl UnaryFunc { + pub fn eval(&self, values: &[Value], expr: &ScalarExpr) -> Result { + let arg = expr.eval(values)?; + match self { + Self::Not => { + let bool = if let Value::Boolean(bool) = arg { + Ok(bool) + } else { + TypeMismatchSnafu { + expected: ConcreteDataType::boolean_datatype(), + actual: arg.data_type(), + } + .fail()? + }?; + Ok(Value::from(!bool)) + } + Self::IsNull => Ok(Value::from(arg.is_null())), + Self::IsTrue | Self::IsFalse => { + let bool = if let Value::Boolean(bool) = arg { + Ok(bool) + } else { + TypeMismatchSnafu { + expected: ConcreteDataType::boolean_datatype(), + actual: arg.data_type(), + } + .fail()? + }?; + if matches!(self, Self::IsTrue) { + Ok(Value::from(bool)) + } else { + Ok(Value::from(!bool)) + } + } + Self::StepTimestamp => { + if let Value::DateTime(datetime) = arg { + let datetime = DateTime::from(datetime.val() + 1); + Ok(Value::from(datetime)) + } else { + TypeMismatchSnafu { + expected: ConcreteDataType::datetime_datatype(), + actual: arg.data_type(), + } + .fail()? + } + } + Self::Cast(to) => { + let arg_ty = arg.data_type(); + let res = cast(arg, to).context({ + CastValueSnafu { + from: arg_ty, + to: to.clone(), + } + })?; + Ok(res) + } + } + } +} + /// TODO(discord9): support more binary functions for more types #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize, Hash)] pub enum BinaryFunc { @@ -96,8 +154,232 @@ pub enum BinaryFunc { ModUInt64, } +impl BinaryFunc { + pub fn eval( + &self, + values: &[Value], + expr1: &ScalarExpr, + expr2: &ScalarExpr, + ) -> Result { + let left = expr1.eval(values)?; + let right = expr2.eval(values)?; + match self { + Self::Eq => Ok(Value::from(left == right)), + Self::NotEq => Ok(Value::from(left != right)), + Self::Lt => Ok(Value::from(left < right)), + Self::Lte => Ok(Value::from(left <= right)), + Self::Gt => Ok(Value::from(left > right)), + Self::Gte => Ok(Value::from(left >= right)), + + Self::AddInt16 => Ok(add::(left, right)?), + Self::AddInt32 => Ok(add::(left, right)?), + Self::AddInt64 => Ok(add::(left, right)?), + Self::AddUInt16 => Ok(add::(left, right)?), + Self::AddUInt32 => Ok(add::(left, right)?), + Self::AddUInt64 => Ok(add::(left, right)?), + Self::AddFloat32 => Ok(add::(left, right)?), + Self::AddFloat64 => Ok(add::(left, right)?), + + Self::SubInt16 => Ok(sub::(left, right)?), + Self::SubInt32 => Ok(sub::(left, right)?), + Self::SubInt64 => Ok(sub::(left, right)?), + Self::SubUInt16 => Ok(sub::(left, right)?), + Self::SubUInt32 => Ok(sub::(left, right)?), + Self::SubUInt64 => Ok(sub::(left, right)?), + Self::SubFloat32 => Ok(sub::(left, right)?), + Self::SubFloat64 => Ok(sub::(left, right)?), + + Self::MulInt16 => Ok(mul::(left, right)?), + Self::MulInt32 => Ok(mul::(left, right)?), + Self::MulInt64 => Ok(mul::(left, right)?), + Self::MulUInt16 => Ok(mul::(left, right)?), + Self::MulUInt32 => Ok(mul::(left, right)?), + Self::MulUInt64 => Ok(mul::(left, right)?), + Self::MulFloat32 => Ok(mul::(left, right)?), + Self::MulFloat64 => Ok(mul::(left, right)?), + + Self::DivInt16 => Ok(div::(left, right)?), + Self::DivInt32 => Ok(div::(left, right)?), + Self::DivInt64 => Ok(div::(left, right)?), + Self::DivUInt16 => Ok(div::(left, right)?), + Self::DivUInt32 => Ok(div::(left, right)?), + Self::DivUInt64 => Ok(div::(left, right)?), + Self::DivFloat32 => Ok(div::(left, right)?), + Self::DivFloat64 => Ok(div::(left, right)?), + + Self::ModInt16 => Ok(rem::(left, right)?), + Self::ModInt32 => Ok(rem::(left, right)?), + Self::ModInt64 => Ok(rem::(left, right)?), + Self::ModUInt16 => Ok(rem::(left, right)?), + Self::ModUInt32 => Ok(rem::(left, right)?), + Self::ModUInt64 => Ok(rem::(left, right)?), + } + } + + /// Reverse the comparison operator, i.e. `a < b` becomes `b > a`, + /// equal and not equal are unchanged. + pub fn reverse_compare(&self) -> Result { + let ret = match &self { + BinaryFunc::Eq => BinaryFunc::Eq, + BinaryFunc::NotEq => BinaryFunc::NotEq, + BinaryFunc::Lt => BinaryFunc::Gt, + BinaryFunc::Lte => BinaryFunc::Gte, + BinaryFunc::Gt => BinaryFunc::Lt, + BinaryFunc::Gte => BinaryFunc::Lte, + _ => { + return InternalSnafu { + reason: format!("Expect a comparison operator, found {:?}", self), + } + .fail(); + } + }; + Ok(ret) + } +} + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize, Hash)] pub enum VariadicFunc { And, Or, } + +impl VariadicFunc { + pub fn eval(&self, values: &[Value], exprs: &[ScalarExpr]) -> Result { + match self { + VariadicFunc::And => and(values, exprs), + VariadicFunc::Or => or(values, exprs), + } + } +} + +fn and(values: &[Value], exprs: &[ScalarExpr]) -> Result { + // If any is false, then return false. Else, if any is null, then return null. Else, return true. + let mut null = false; + for expr in exprs { + match expr.eval(values) { + Ok(Value::Boolean(true)) => {} + Ok(Value::Boolean(false)) => return Ok(Value::Boolean(false)), // short-circuit + Ok(Value::Null) => null = true, + Err(this_err) => { + return Err(this_err); + } // retain first error encountered + Ok(x) => InvalidArgumentSnafu { + reason: format!( + "`and()` only support boolean type, found value {:?} of type {:?}", + x, + x.data_type() + ), + } + .fail()?, + } + } + match null { + true => Ok(Value::Null), + false => Ok(Value::Boolean(true)), + } +} + +fn or(values: &[Value], exprs: &[ScalarExpr]) -> Result { + // If any is false, then return false. Else, if any is null, then return null. Else, return true. + let mut null = false; + for expr in exprs { + match expr.eval(values) { + Ok(Value::Boolean(true)) => return Ok(Value::Boolean(true)), // short-circuit + Ok(Value::Boolean(false)) => {} + Ok(Value::Null) => null = true, + Err(this_err) => { + return Err(this_err); + } // retain first error encountered + Ok(x) => InvalidArgumentSnafu { + reason: format!( + "`or()` only support boolean type, found value {:?} of type {:?}", + x, + x.data_type() + ), + } + .fail()?, + } + } + match null { + true => Ok(Value::Null), + false => Ok(Value::Boolean(false)), + } +} + +fn add(left: Value, right: Value) -> Result +where + T: TryFrom + num_traits::Num, + Value: From, +{ + let left = T::try_from(left).map_err(|e| TryFromValueSnafu { msg: e.to_string() }.build())?; + let right = T::try_from(right).map_err(|e| TryFromValueSnafu { msg: e.to_string() }.build())?; + Ok(Value::from(left + right)) +} + +fn sub(left: Value, right: Value) -> Result +where + T: TryFrom + num_traits::Num, + Value: From, +{ + let left = T::try_from(left).map_err(|e| TryFromValueSnafu { msg: e.to_string() }.build())?; + let right = T::try_from(right).map_err(|e| TryFromValueSnafu { msg: e.to_string() }.build())?; + Ok(Value::from(left - right)) +} + +fn mul(left: Value, right: Value) -> Result +where + T: TryFrom + num_traits::Num, + Value: From, +{ + let left = T::try_from(left).map_err(|e| TryFromValueSnafu { msg: e.to_string() }.build())?; + let right = T::try_from(right).map_err(|e| TryFromValueSnafu { msg: e.to_string() }.build())?; + Ok(Value::from(left * right)) +} + +fn div(left: Value, right: Value) -> Result +where + T: TryFrom + num_traits::Num, + >::Error: std::fmt::Debug, + Value: From, +{ + let left = T::try_from(left).map_err(|e| TryFromValueSnafu { msg: e.to_string() }.build())?; + let right = T::try_from(right).map_err(|e| TryFromValueSnafu { msg: e.to_string() }.build())?; + if right.is_zero() { + return Err(DivisionByZeroSnafu {}.build()); + } + Ok(Value::from(left / right)) +} + +fn rem(left: Value, right: Value) -> Result +where + T: TryFrom + num_traits::Num, + >::Error: std::fmt::Debug, + Value: From, +{ + let left = T::try_from(left).map_err(|e| TryFromValueSnafu { msg: e.to_string() }.build())?; + let right = T::try_from(right).map_err(|e| TryFromValueSnafu { msg: e.to_string() }.build())?; + Ok(Value::from(left % right)) +} + +#[test] +fn test_num_ops() { + let left = Value::from(10); + let right = Value::from(3); + let res = add::(left.clone(), right.clone()).unwrap(); + assert_eq!(res, Value::from(13)); + let res = sub::(left.clone(), right.clone()).unwrap(); + assert_eq!(res, Value::from(7)); + let res = mul::(left.clone(), right.clone()).unwrap(); + assert_eq!(res, Value::from(30)); + let res = div::(left.clone(), right.clone()).unwrap(); + assert_eq!(res, Value::from(3)); + let res = rem::(left.clone(), right.clone()).unwrap(); + assert_eq!(res, Value::from(1)); + + let values = vec![Value::from(true), Value::from(false)]; + let exprs = vec![ScalarExpr::Column(0), ScalarExpr::Column(1)]; + let res = and(&values, &exprs).unwrap(); + assert_eq!(res, Value::from(false)); + let res = or(&values, &exprs).unwrap(); + assert_eq!(res, Value::from(true)); +} diff --git a/src/flow/src/expr/scalar.rs b/src/flow/src/expr/scalar.rs index 3c1d745a8616..fa03bb9f1912 100644 --- a/src/flow/src/expr/scalar.rs +++ b/src/flow/src/expr/scalar.rs @@ -18,7 +18,9 @@ use datatypes::prelude::ConcreteDataType; use datatypes::value::Value; use serde::{Deserialize, Serialize}; -use crate::expr::error::{EvalError, InvalidArgumentSnafu, OptimizeSnafu}; +use crate::expr::error::{ + EvalError, InvalidArgumentSnafu, OptimizeSnafu, UnsupportedTemporalFilterSnafu, +}; use crate::expr::func::{BinaryFunc, UnaryFunc, UnmaterializableFunc, VariadicFunc}; /// A scalar expression, which can be evaluated to a value. @@ -59,3 +61,338 @@ pub enum ScalarExpr { els: Box, }, } + +impl ScalarExpr { + pub fn call_unary(self, func: UnaryFunc) -> Self { + ScalarExpr::CallUnary { + func, + expr: Box::new(self), + } + } + + pub fn call_binary(self, other: Self, func: BinaryFunc) -> Self { + ScalarExpr::CallBinary { + func, + expr1: Box::new(self), + expr2: Box::new(other), + } + } + + pub fn eval(&self, values: &[Value]) -> Result { + match self { + ScalarExpr::Column(index) => Ok(values[*index].clone()), + ScalarExpr::Literal(row_res, _ty) => Ok(row_res.clone()), + ScalarExpr::CallUnmaterializable(f) => OptimizeSnafu { + reason: "Can't eval unmaterializable function".to_string(), + } + .fail(), + ScalarExpr::CallUnary { func, expr } => func.eval(values, expr), + ScalarExpr::CallBinary { func, expr1, expr2 } => func.eval(values, expr1, expr2), + ScalarExpr::CallVariadic { func, exprs } => func.eval(values, exprs), + ScalarExpr::If { cond, then, els } => match cond.eval(values) { + Ok(Value::Boolean(true)) => then.eval(values), + Ok(Value::Boolean(false)) => els.eval(values), + _ => InvalidArgumentSnafu { + reason: "if condition must be boolean".to_string(), + } + .fail(), + }, + } + } + + /// Rewrites column indices with their value in `permutation`. + /// + /// This method is applicable even when `permutation` is not a + /// strict permutation, and it only needs to have entries for + /// each column referenced in `self`. + pub fn permute(&mut self, permutation: &[usize]) { + self.visit_mut_post_nolimit(&mut |e| { + if let ScalarExpr::Column(old_i) = e { + *old_i = permutation[*old_i]; + } + }); + } + + /// Rewrites column indices with their value in `permutation`. + /// + /// This method is applicable even when `permutation` is not a + /// strict permutation, and it only needs to have entries for + /// each column referenced in `self`. + pub fn permute_map(&mut self, permutation: &BTreeMap) { + self.visit_mut_post_nolimit(&mut |e| { + if let ScalarExpr::Column(old_i) = e { + *old_i = permutation[old_i]; + } + }); + } + + /// Returns the set of columns that are referenced by `self`. + pub fn get_all_ref_columns(&self) -> BTreeSet { + let mut support = BTreeSet::new(); + self.visit_post_nolimit(&mut |e| { + if let ScalarExpr::Column(i) = e { + support.insert(*i); + } + }); + support + } + + pub fn as_literal(&self) -> Option { + if let ScalarExpr::Literal(lit, _column_type) = self { + Some(lit.clone()) + } else { + None + } + } + + pub fn is_literal(&self) -> bool { + matches!(self, ScalarExpr::Literal(..)) + } + + pub fn is_literal_true(&self) -> bool { + Some(Value::Boolean(true)) == self.as_literal() + } + + pub fn is_literal_false(&self) -> bool { + Some(Value::Boolean(false)) == self.as_literal() + } + + pub fn is_literal_null(&self) -> bool { + Some(Value::Null) == self.as_literal() + } + + pub fn literal_null() -> Self { + ScalarExpr::Literal(Value::Null, ConcreteDataType::null_datatype()) + } + + pub fn literal(res: Value, typ: ConcreteDataType) -> Self { + ScalarExpr::Literal(res, typ) + } + + pub fn literal_false() -> Self { + ScalarExpr::Literal(Value::Boolean(false), ConcreteDataType::boolean_datatype()) + } + + pub fn literal_true() -> Self { + ScalarExpr::Literal(Value::Boolean(true), ConcreteDataType::boolean_datatype()) + } +} + +impl ScalarExpr { + /// visit post-order without stack call limit, but may cause stack overflow + fn visit_post_nolimit(&self, f: &mut F) + where + F: FnMut(&Self), + { + self.visit_children(|e| e.visit_post_nolimit(f)); + f(self); + } + + fn visit_children(&self, mut f: F) + where + F: FnMut(&Self), + { + match self { + ScalarExpr::Column(_) + | ScalarExpr::Literal(_, _) + | ScalarExpr::CallUnmaterializable(_) => (), + ScalarExpr::CallUnary { expr, .. } => f(expr), + ScalarExpr::CallBinary { expr1, expr2, .. } => { + f(expr1); + f(expr2); + } + ScalarExpr::CallVariadic { exprs, .. } => { + for expr in exprs { + f(expr); + } + } + ScalarExpr::If { cond, then, els } => { + f(cond); + f(then); + f(els); + } + } + } + + fn visit_mut_post_nolimit(&mut self, f: &mut F) + where + F: FnMut(&mut Self), + { + self.visit_mut_children(|e: &mut Self| e.visit_mut_post_nolimit(f)); + f(self); + } + + fn visit_mut_children(&mut self, mut f: F) + where + F: FnMut(&mut Self), + { + match self { + ScalarExpr::Column(_) + | ScalarExpr::Literal(_, _) + | ScalarExpr::CallUnmaterializable(_) => (), + ScalarExpr::CallUnary { expr, .. } => f(expr), + ScalarExpr::CallBinary { expr1, expr2, .. } => { + f(expr1); + f(expr2); + } + ScalarExpr::CallVariadic { exprs, .. } => { + for expr in exprs { + f(expr); + } + } + ScalarExpr::If { cond, then, els } => { + f(cond); + f(then); + f(els); + } + } + } +} + +impl ScalarExpr { + /// if expr contains function `Now` + pub fn contains_temporal(&self) -> bool { + let mut contains = false; + self.visit_post_nolimit(&mut |e| { + if let ScalarExpr::CallUnmaterializable(UnmaterializableFunc::Now) = e { + contains = true; + } + }); + contains + } + + /// extract lower or upper bound of `Now` for expr, where `lower bound <= expr < upper bound` + /// + /// returned bool indicates whether the bound is upper bound: + /// + /// false for lower bound, true for upper bound + /// TODO(discord9): allow simple transform like `now() + a < b` to `now() < b - a` + pub fn extract_bound(&self) -> Result<(Option, Option), EvalError> { + let unsupported_err = |msg: &str| { + UnsupportedTemporalFilterSnafu { + reason: msg.to_string(), + } + .fail() + }; + + let Self::CallBinary { + mut func, + mut expr1, + mut expr2, + } = self.clone() + else { + return unsupported_err("Not a binary expression"); + }; + + // TODO: support simple transform like `now() + a < b` to `now() < b - a` + + let expr1_is_now = *expr1 == ScalarExpr::CallUnmaterializable(UnmaterializableFunc::Now); + let expr2_is_now = *expr2 == ScalarExpr::CallUnmaterializable(UnmaterializableFunc::Now); + + if !(expr1_is_now ^ expr2_is_now) { + return unsupported_err("None of the sides of the comparison is `now()`"); + } + + if expr2_is_now { + std::mem::swap(&mut expr1, &mut expr2); + func = BinaryFunc::reverse_compare(&func)?; + } + + let step = |expr: ScalarExpr| expr.call_unary(UnaryFunc::StepTimestamp); + match func { + // now == expr2 -> now <= expr2 && now < expr2 + 1 + BinaryFunc::Eq => Ok((Some(*expr2.clone()), Some(step(*expr2)))), + // now < expr2 -> now < expr2 + BinaryFunc::Lt => Ok((None, Some(*expr2))), + // now <= expr2 -> now < expr2 + 1 + BinaryFunc::Lte => Ok((None, Some(step(*expr2)))), + // now > expr2 -> now >= expr2 + 1 + BinaryFunc::Gt => Ok((Some(step(*expr2)), None)), + // now >= expr2 -> now >= expr2 + BinaryFunc::Gte => Ok((Some(*expr2), None)), + _ => unreachable!("Already checked"), + } + } +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_extract_bound() { + let test_list: [(ScalarExpr, Result<_, EvalError>); 5] = [ + // col(0) == now + ( + ScalarExpr::CallBinary { + func: BinaryFunc::Eq, + expr1: Box::new(ScalarExpr::CallUnmaterializable(UnmaterializableFunc::Now)), + expr2: Box::new(ScalarExpr::Column(0)), + }, + Ok(( + Some(ScalarExpr::Column(0)), + Some(ScalarExpr::CallUnary { + func: UnaryFunc::StepTimestamp, + expr: Box::new(ScalarExpr::Column(0)), + }), + )), + ), + // now < col(0) + ( + ScalarExpr::CallBinary { + func: BinaryFunc::Lt, + expr1: Box::new(ScalarExpr::CallUnmaterializable(UnmaterializableFunc::Now)), + expr2: Box::new(ScalarExpr::Column(0)), + }, + Ok((None, Some(ScalarExpr::Column(0)))), + ), + // now <= col(0) + ( + ScalarExpr::CallBinary { + func: BinaryFunc::Lte, + expr1: Box::new(ScalarExpr::CallUnmaterializable(UnmaterializableFunc::Now)), + expr2: Box::new(ScalarExpr::Column(0)), + }, + Ok(( + None, + Some(ScalarExpr::CallUnary { + func: UnaryFunc::StepTimestamp, + expr: Box::new(ScalarExpr::Column(0)), + }), + )), + ), + // now > col(0) -> now >= col(0) + 1 + ( + ScalarExpr::CallBinary { + func: BinaryFunc::Gt, + expr1: Box::new(ScalarExpr::CallUnmaterializable(UnmaterializableFunc::Now)), + expr2: Box::new(ScalarExpr::Column(0)), + }, + Ok(( + Some(ScalarExpr::CallUnary { + func: UnaryFunc::StepTimestamp, + expr: Box::new(ScalarExpr::Column(0)), + }), + None, + )), + ), + // now >= col(0) + ( + ScalarExpr::CallBinary { + func: BinaryFunc::Gte, + expr1: Box::new(ScalarExpr::CallUnmaterializable(UnmaterializableFunc::Now)), + expr2: Box::new(ScalarExpr::Column(0)), + }, + Ok((Some(ScalarExpr::Column(0)), None)), + ), + ]; + for (expr, expected) in test_list.into_iter() { + let actual = expr.extract_bound(); + // EvalError is not Eq, so we need to compare the error message + match (actual, expected) { + (Ok(l), Ok(r)) => assert_eq!(l, r), + (Err(l), Err(r)) => assert!(matches!(l, r)), + (l, r) => panic!("expected: {:?}, actual: {:?}", r, l), + } + } + } +} diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml index 81990c37f192..9d5578eb50be 100644 --- a/src/frontend/Cargo.toml +++ b/src/frontend/Cargo.toml @@ -9,6 +9,9 @@ default = ["python"] python = ["dep:script"] testing = [] +[lints] +workspace = true + [dependencies] api.workspace = true arc-swap = "1.0" diff --git a/src/frontend/src/instance.rs b/src/frontend/src/instance.rs index 4dcb28ae06dc..cf58b741652b 100644 --- a/src/frontend/src/instance.rs +++ b/src/frontend/src/instance.rs @@ -147,7 +147,7 @@ impl Instance { .enable_router() .enable_store() .enable_heartbeat() - .enable_ddl() + .enable_procedure() .channel_manager(channel_manager) .ddl_channel_manager(ddl_channel_manager) .build(); diff --git a/src/frontend/src/instance/builder.rs b/src/frontend/src/instance/builder.rs index 8e891f41faeb..c890eeba71fa 100644 --- a/src/frontend/src/instance/builder.rs +++ b/src/frontend/src/instance/builder.rs @@ -19,11 +19,12 @@ use catalog::kvbackend::KvBackendCatalogManager; use common_base::Plugins; use common_meta::cache_invalidator::{CacheInvalidatorRef, DummyCacheInvalidator}; use common_meta::datanode_manager::DatanodeManagerRef; -use common_meta::ddl::DdlTaskExecutorRef; +use common_meta::ddl::ProcedureExecutorRef; use common_meta::key::TableMetadataManager; use common_meta::kv_backend::KvBackendRef; use operator::delete::Deleter; use operator::insert::Inserter; +use operator::procedure::ProcedureServiceOperator; use operator::statement::StatementExecutor; use operator::table::TableMutationOperator; use partition::manager::PartitionRuleManager; @@ -35,12 +36,13 @@ use crate::instance::region_query::FrontendRegionQueryHandler; use crate::instance::{Instance, StatementExecutorRef}; use crate::script::ScriptExecutor; +/// The frontend [`Instance`] builder. pub struct FrontendBuilder { kv_backend: KvBackendRef, cache_invalidator: Option, datanode_manager: DatanodeManagerRef, plugins: Option, - ddl_task_executor: DdlTaskExecutorRef, + procedure_executor: ProcedureExecutorRef, heartbeat_task: Option, } @@ -48,14 +50,14 @@ impl FrontendBuilder { pub fn new( kv_backend: KvBackendRef, datanode_manager: DatanodeManagerRef, - ddl_task_executor: DdlTaskExecutorRef, + procedure_executor: ProcedureExecutorRef, ) -> Self { Self { kv_backend, cache_invalidator: None, datanode_manager, plugins: None, - ddl_task_executor, + procedure_executor, heartbeat_task: None, } } @@ -112,10 +114,15 @@ impl FrontendBuilder { deleter.clone(), )); + let procedure_service_handler = Arc::new(ProcedureServiceOperator::new( + self.procedure_executor.clone(), + )); + let query_engine = QueryEngineFactory::new_with_plugins( catalog_manager.clone(), Some(region_query_handler.clone()), Some(table_mutation_handler), + Some(procedure_service_handler), true, plugins.clone(), ) @@ -127,7 +134,7 @@ impl FrontendBuilder { let statement_executor = Arc::new(StatementExecutor::new( catalog_manager.clone(), query_engine.clone(), - self.ddl_task_executor, + self.procedure_executor, kv_backend.clone(), catalog_manager.clone(), inserter.clone(), diff --git a/src/index/Cargo.toml b/src/index/Cargo.toml index 082831b7315c..effa0a79e635 100644 --- a/src/index/Cargo.toml +++ b/src/index/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] async-trait.workspace = true asynchronous-codec = "0.7.0" diff --git a/src/log-store/Cargo.toml b/src/log-store/Cargo.toml index 82d8c2a6ba01..e134399e4558 100644 --- a/src/log-store/Cargo.toml +++ b/src/log-store/Cargo.toml @@ -9,6 +9,9 @@ protobuf-build = { version = "0.15", default-features = false, features = [ "protobuf-codec", ] } +[lints] +workspace = true + [dependencies] async-stream.workspace = true async-trait.workspace = true diff --git a/src/meta-client/Cargo.toml b/src/meta-client/Cargo.toml index 921fe610548f..2ba3b784b71d 100644 --- a/src/meta-client/Cargo.toml +++ b/src/meta-client/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] api.workspace = true async-trait = "0.1" diff --git a/src/meta-client/src/client.rs b/src/meta-client/src/client.rs index 4620b92bea4a..94122b81a7c6 100644 --- a/src/meta-client/src/client.rs +++ b/src/meta-client/src/client.rs @@ -23,10 +23,13 @@ mod store; use api::v1::meta::Role; use common_error::ext::BoxedError; use common_grpc::channel_manager::{ChannelConfig, ChannelManager}; -use common_meta::ddl::{DdlTaskExecutor, ExecutorContext}; +use common_meta::ddl::{ExecutorContext, ProcedureExecutor}; use common_meta::error::{self as meta_error, Result as MetaResult}; use common_meta::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse}; use common_meta::rpc::lock::{LockRequest, LockResponse, UnlockRequest}; +use common_meta::rpc::procedure::{ + MigrateRegionRequest, MigrateRegionResponse, ProcedureStateResponse, +}; use common_meta::rpc::store::{ BatchDeleteRequest, BatchDeleteResponse, BatchGetRequest, BatchGetResponse, BatchPutRequest, BatchPutResponse, CompareAndPutRequest, CompareAndPutResponse, DeleteRangeRequest, @@ -56,7 +59,7 @@ pub struct MetaClientBuilder { enable_router: bool, enable_store: bool, enable_lock: bool, - enable_ddl: bool, + enable_procedure: bool, channel_manager: Option, ddl_channel_manager: Option, heartbeat_channel_manager: Option, @@ -99,9 +102,9 @@ impl MetaClientBuilder { } } - pub fn enable_ddl(self) -> Self { + pub fn enable_procedure(self) -> Self { Self { - enable_ddl: true, + enable_procedure: true, ..self } } @@ -155,9 +158,9 @@ impl MetaClientBuilder { if self.enable_lock { client.lock = Some(LockClient::new(self.id, self.role, mgr.clone())); } - if self.enable_ddl { + if self.enable_procedure { let mgr = self.ddl_channel_manager.unwrap_or(mgr); - client.ddl = Some(ProcedureClient::new( + client.procedure = Some(ProcedureClient::new( self.id, self.role, mgr, @@ -176,11 +179,11 @@ pub struct MetaClient { heartbeat: Option, store: Option, lock: Option, - ddl: Option, + procedure: Option, } #[async_trait::async_trait] -impl DdlTaskExecutor for MetaClient { +impl ProcedureExecutor for MetaClient { async fn submit_ddl_task( &self, _ctx: &ExecutorContext, @@ -191,6 +194,28 @@ impl DdlTaskExecutor for MetaClient { .map_err(BoxedError::new) .context(meta_error::ExternalSnafu) } + + async fn migrate_region( + &self, + _ctx: &ExecutorContext, + request: MigrateRegionRequest, + ) -> MetaResult { + self.migrate_region(request) + .await + .map_err(BoxedError::new) + .context(meta_error::ExternalSnafu) + } + + async fn query_procedure_state( + &self, + _ctx: &ExecutorContext, + pid: &str, + ) -> MetaResult { + self.query_procedure_state(pid) + .await + .map_err(BoxedError::new) + .context(meta_error::ExternalSnafu) + } } impl MetaClient { @@ -228,7 +253,7 @@ impl MetaClient { client.start(urls.clone()).await?; info!("Lock client started"); } - if let Some(client) = &mut self.ddl { + if let Some(client) = &mut self.procedure { client.start(urls).await?; info!("DDL client started"); } @@ -328,13 +353,33 @@ impl MetaClient { Ok(()) } + /// Query the procedure state by its id. + pub async fn query_procedure_state(&self, pid: &str) -> Result { + self.procedure_client()?.query_procedure_state(pid).await + } + + /// Submit a region migration task. + pub async fn migrate_region( + &self, + request: MigrateRegionRequest, + ) -> Result { + self.procedure_client()? + .migrate_region( + request.region_id, + request.from_peer, + request.to_peer, + request.replay_timeout, + ) + .await + } + /// Submit a DDL task pub async fn submit_ddl_task( &self, req: SubmitDdlTaskRequest, ) -> Result { let res = self - .ddl_client()? + .procedure_client()? .submit_ddl_task(req.try_into().context(error::ConvertMetaRequestSnafu)?) .await? .try_into() @@ -365,8 +410,8 @@ impl MetaClient { } #[inline] - pub fn ddl_client(&self) -> Result { - self.ddl + pub fn procedure_client(&self) -> Result { + self.procedure .clone() .context(error::NotStartedSnafu { name: "ddl_client" }) } diff --git a/src/meta-srv/Cargo.toml b/src/meta-srv/Cargo.toml index 1202514ac686..176c4a1f4f37 100644 --- a/src/meta-srv/Cargo.toml +++ b/src/meta-srv/Cargo.toml @@ -7,6 +7,9 @@ license.workspace = true [features] mock = [] +[lints] +workspace = true + [dependencies] anymap = "1.0.0-beta.2" api.workspace = true diff --git a/src/meta-srv/src/metasrv.rs b/src/meta-srv/src/metasrv.rs index cddcd06885d7..2aafe519bff0 100644 --- a/src/meta-srv/src/metasrv.rs +++ b/src/meta-srv/src/metasrv.rs @@ -21,7 +21,7 @@ use std::time::Duration; use common_base::Plugins; use common_greptimedb_telemetry::GreptimeDBTelemetryTask; use common_grpc::channel_manager; -use common_meta::ddl::DdlTaskExecutorRef; +use common_meta::ddl::ProcedureExecutorRef; use common_meta::key::TableMetadataManagerRef; use common_meta::kv_backend::{KvBackendRef, ResettableKvBackend, ResettableKvBackendRef}; use common_meta::peer::Peer; @@ -253,7 +253,7 @@ pub struct MetaSrv { lock: DistLockRef, procedure_manager: ProcedureManagerRef, mailbox: MailboxRef, - ddl_executor: DdlTaskExecutorRef, + procedure_executor: ProcedureExecutorRef, wal_options_allocator: WalOptionsAllocatorRef, table_metadata_manager: TableMetadataManagerRef, memory_region_keeper: MemoryRegionKeeperRef, @@ -423,8 +423,8 @@ impl MetaSrv { &self.mailbox } - pub fn ddl_executor(&self) -> &DdlTaskExecutorRef { - &self.ddl_executor + pub fn procedure_executor(&self) -> &ProcedureExecutorRef { + &self.procedure_executor } pub fn procedure_manager(&self) -> &ProcedureManagerRef { diff --git a/src/meta-srv/src/metasrv/builder.rs b/src/meta-srv/src/metasrv/builder.rs index 0a38bc3f37be..dc007a81fe01 100644 --- a/src/meta-srv/src/metasrv/builder.rs +++ b/src/meta-srv/src/metasrv/builder.rs @@ -329,7 +329,7 @@ impl MetaSrvBuilder { lock, procedure_manager, mailbox, - ddl_executor: ddl_manager, + procedure_executor: ddl_manager, wal_options_allocator, table_metadata_manager, greptimedb_telemetry_task: get_greptimedb_telemetry_task( diff --git a/src/meta-srv/src/service/procedure.rs b/src/meta-srv/src/service/procedure.rs index 8181ee5e4214..a45e538a3693 100644 --- a/src/meta-srv/src/service/procedure.rs +++ b/src/meta-srv/src/service/procedure.rs @@ -66,7 +66,7 @@ impl procedure_service_server::ProcedureService for MetaSrv { .context(error::ConvertProtoDataSnafu)?; let resp = self - .ddl_executor() + .procedure_executor() .submit_ddl_task( &ExecutorContext { cluster_id: Some(cluster_id), diff --git a/src/metric-engine/Cargo.toml b/src/metric-engine/Cargo.toml index 4722fa81e2fc..13aa59fe8b30 100644 --- a/src/metric-engine/Cargo.toml +++ b/src/metric-engine/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] api.workspace = true aquamarine.workspace = true diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml index d9aecbdd7f52..5ebe42a9da5e 100644 --- a/src/mito2/Cargo.toml +++ b/src/mito2/Cargo.toml @@ -8,6 +8,9 @@ license.workspace = true default = [] test = ["common-test-util", "log-store"] +[lints] +workspace = true + [dependencies] anymap = "1.0.0-beta.2" api.workspace = true diff --git a/src/mito2/src/error.rs b/src/mito2/src/error.rs index f141857a5322..3885e3ae8506 100644 --- a/src/mito2/src/error.rs +++ b/src/mito2/src/error.rs @@ -566,6 +566,12 @@ pub enum Error { error: parquet::errors::ParquetError, location: Location, }, + + #[snafu(display("Failed to iter data part"))] + ReadDataPart { + #[snafu(source)] + error: parquet::errors::ParquetError, + }, } pub type Result = std::result::Result; @@ -669,7 +675,7 @@ impl ErrorExt for Error { FilterRecordBatch { source, .. } => source.status_code(), Upload { .. } => StatusCode::StorageUnavailable, BiError { .. } => StatusCode::Internal, - EncodeMemtable { .. } => StatusCode::Internal, + EncodeMemtable { .. } | ReadDataPart { .. } => StatusCode::Internal, } } diff --git a/src/mito2/src/memtable/merge_tree.rs b/src/mito2/src/memtable/merge_tree.rs index 6e7c0329b418..8a0a6031a0bf 100644 --- a/src/mito2/src/memtable/merge_tree.rs +++ b/src/mito2/src/memtable/merge_tree.rs @@ -94,7 +94,8 @@ impl Memtable for MergeTreeMemtable { // TODO(yingwen): Validate schema while inserting rows. let mut metrics = WriteMetrics::default(); - let res = self.tree.write(kvs, &mut metrics); + let mut pk_buffer = Vec::new(); + let res = self.tree.write(kvs, &mut pk_buffer, &mut metrics); self.update_stats(&metrics); diff --git a/src/mito2/src/memtable/merge_tree/data.rs b/src/mito2/src/memtable/merge_tree/data.rs index e4ed65f8f601..20224b8af23c 100644 --- a/src/mito2/src/memtable/merge_tree/data.rs +++ b/src/mito2/src/memtable/merge_tree/data.rs @@ -15,6 +15,7 @@ //! Data part of a shard. use std::cmp::{Ordering, Reverse}; +use std::fmt::{Debug, Formatter}; use std::ops::Range; use std::sync::Arc; @@ -31,6 +32,7 @@ use datatypes::vectors::{ TimestampSecondVector, UInt16Vector, UInt16VectorBuilder, UInt64Vector, UInt64VectorBuilder, UInt8VectorBuilder, }; +use parquet::arrow::arrow_reader::{ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder}; use parquet::arrow::ArrowWriter; use parquet::file::properties::WriterProperties; use snafu::ResultExt; @@ -140,13 +142,13 @@ impl DataBuffer { /// `freeze` clears the buffers of builders. pub fn freeze(&mut self, pk_weights: &[u16]) -> Result { let encoder = DataPartEncoder::new(&self.metadata, pk_weights, None); - let encoded = encoder.write(self)?; - Ok(DataPart::Parquet(encoded)) + let parts = encoder.write(self)?; + Ok(parts) } /// Reads batches from data buffer without resetting builder's buffers. - pub fn iter(&mut self, pk_weights: &[u16]) -> Result { - // todo(hl): control whether to dedup while invoking `iter`. + pub fn read(&mut self, pk_weights: &[u16]) -> Result { + // todo(hl): control whether to dedup while invoking `read`. let batch = data_buffer_to_record_batches( self.data_part_schema.clone(), self, @@ -155,7 +157,7 @@ impl DataBuffer { true, true, )?; - DataBufferIter::new(batch) + DataBufferReader::new(batch) } /// Returns num of rows in data buffer. @@ -287,21 +289,21 @@ fn data_buffer_to_record_batches( } #[derive(Debug)] -pub(crate) struct DataBufferIter { +pub(crate) struct DataBufferReader { batch: RecordBatch, offset: usize, current_batch: Option<(PkIndex, Range)>, } -impl DataBufferIter { +impl DataBufferReader { pub(crate) fn new(batch: RecordBatch) -> Result { - let mut iter = Self { + let mut reader = Self { batch, offset: 0, current_batch: None, }; - iter.next()?; // fill data batch for comparison and merge. - Ok(iter) + reader.next()?; // fill data batch for comparison and merge. + Ok(reader) } pub(crate) fn is_valid(&self) -> bool { @@ -309,7 +311,7 @@ impl DataBufferIter { } /// # Panics - /// If Current iterator is not exhausted. + /// If Current reader is exhausted. pub(crate) fn current_data_batch(&self) -> DataBatch { let (pk_index, range) = self.current_batch.as_ref().unwrap(); DataBatch { @@ -320,13 +322,13 @@ impl DataBufferIter { } /// # Panics - /// If Current iterator is exhausted. + /// If Current reader is exhausted. pub(crate) fn current_pk_index(&self) -> PkIndex { let (pk_index, _) = self.current_batch.as_ref().unwrap(); *pk_index } - /// Advances iterator to next data batch. + /// Advances reader to next data batch. pub(crate) fn next(&mut self) -> Result<()> { if self.offset >= self.batch.num_rows() { self.current_batch = None; @@ -506,7 +508,7 @@ impl<'a> DataPartEncoder<'a> { .build() }) } - pub fn write(&self, source: &mut DataBuffer) -> Result { + pub fn write(&self, source: &mut DataBuffer) -> Result { let mut bytes = Vec::with_capacity(1024); let mut writer = ArrowWriter::try_new(&mut bytes, self.schema.clone(), self.writer_props()) .context(error::EncodeMemtableSnafu)?; @@ -519,26 +521,138 @@ impl<'a> DataPartEncoder<'a> { true, )?; writer.write(&rb).context(error::EncodeMemtableSnafu)?; - let _file_meta = writer.close().context(error::EncodeMemtableSnafu)?; - Ok(Bytes::from(bytes)) + let _metadata = writer.close().context(error::EncodeMemtableSnafu)?; + Ok(DataPart::Parquet(ParquetPart { + data: Bytes::from(bytes), + })) } } +/// Data parts under a shard. +pub struct DataParts { + /// The active writing buffer. + pub(crate) active: DataBuffer, + /// immutable (encoded) parts. + pub(crate) frozen: Vec, +} + /// Format of immutable data part. pub enum DataPart { - Parquet(Bytes), + Parquet(ParquetPart), } impl DataPart { fn is_empty(&self) -> bool { match self { - DataPart::Parquet(data) => data.is_empty(), + DataPart::Parquet(p) => p.data.is_empty(), + } + } + + /// Reads frozen data part and yields [DataBatch]es. + pub fn read(&self) -> Result { + match self { + DataPart::Parquet(data_bytes) => DataPartReader::new(data_bytes.data.clone(), None), } } } -/// Data parts under a shard. -pub struct DataParts {} +pub struct DataPartReader { + inner: ParquetRecordBatchReader, + current_range: Range, + current_pk_index: Option, + current_batch: Option, +} + +impl Debug for DataPartReader { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("DataPartReader") + .field("current_range", &self.current_range) + .field("current_pk_index", &self.current_pk_index) + .finish() + } +} + +impl DataPartReader { + pub fn new(data: Bytes, batch_size: Option) -> Result { + let mut builder = + ParquetRecordBatchReaderBuilder::try_new(data).context(error::ReadDataPartSnafu)?; + if let Some(batch_size) = batch_size { + builder = builder.with_batch_size(batch_size); + } + let parquet_reader = builder.build().context(error::ReadDataPartSnafu)?; + let mut reader = Self { + inner: parquet_reader, + current_pk_index: None, + current_range: 0..0, + current_batch: None, + }; + reader.next()?; + Ok(reader) + } + + /// Returns false if current reader is exhausted. + pub(crate) fn is_valid(&self) -> bool { + self.current_pk_index.is_some() + } + + /// Returns current pk index. + /// + /// # Panics + /// If reader is exhausted. + pub(crate) fn current_pk_index(&self) -> PkIndex { + self.current_pk_index.expect("DataPartReader is exhausted") + } + + /// Returns current data batch of reader. + /// # Panics + /// If reader is exhausted. + pub(crate) fn current_data_batch(&self) -> DataBatch { + let rb = self.current_batch.as_ref().unwrap(); + let pk_index = self.current_pk_index.unwrap(); + let range = self.current_range.clone(); + DataBatch { + pk_index, + rb, + range, + } + } + + pub(crate) fn next(&mut self) -> Result<()> { + if let Some((next_pk, range)) = self.search_next_pk_range() { + // first try to search next pk in current record batch. + self.current_pk_index = Some(next_pk); + self.current_range = range; + } else { + // current record batch reaches eof, fetch next record batch from parquet reader. + if let Some(rb) = self.inner.next() { + let rb = rb.context(error::ComputeArrowSnafu)?; + self.current_range = 0..0; + self.current_batch = Some(rb); + return self.next(); + } else { + // parquet is also exhausted + self.current_pk_index = None; + self.current_batch = None; + } + } + + Ok(()) + } + + /// Searches next primary key along with it's offset range inside record batch. + fn search_next_pk_range(&self) -> Option<(PkIndex, Range)> { + self.current_batch.as_ref().and_then(|b| { + // safety: PK_INDEX_COLUMN_NAME must present in record batch yielded by data part. + let pk_array = pk_index_array(b); + search_next_pk_range(pk_array, self.current_range.end) + }) + } +} + +/// Parquet-encoded `DataPart`. +pub struct ParquetPart { + data: Bytes, +} #[cfg(test)] mod tests { @@ -778,7 +892,10 @@ mod tests { assert_eq!(4, buffer.num_rows()); let encoder = DataPartEncoder::new(&meta, &[0, 1, 2], None); - let encoded = encoder.write(&mut buffer).unwrap(); + let encoded = match encoder.write(&mut buffer).unwrap() { + DataPart::Parquet(data) => data.data, + }; + let s = String::from_utf8_lossy(encoded.as_bytes()); assert!(s.starts_with("PAR1")); assert!(s.ends_with("PAR1")); @@ -789,10 +906,10 @@ mod tests { assert_eq!(3, batch.num_rows()); } - fn check_buffer_values_equal(iter: &mut DataBufferIter, expected_values: &[Vec]) { + fn check_buffer_values_equal(reader: &mut DataBufferReader, expected_values: &[Vec]) { let mut output = Vec::with_capacity(expected_values.len()); - while iter.is_valid() { - let batch = iter.current_data_batch().slice_record_batch(); + while reader.is_valid() { + let batch = reader.current_data_batch().slice_record_batch(); let values = batch .column_by_name("v1") .unwrap() @@ -803,7 +920,7 @@ mod tests { .map(|v| v.unwrap()) .collect::>(); output.push(values); - iter.next().unwrap(); + reader.next().unwrap(); } assert_eq!(expected_values, output); } @@ -842,7 +959,7 @@ mod tests { 2, ); - let mut iter = buffer.iter(&[0, 1, 3, 2]).unwrap(); + let mut iter = buffer.read(&[0, 1, 3, 2]).unwrap(); check_buffer_values_equal(&mut iter, &[vec![1.1, 2.1, 3.1], vec![1.0, 2.0, 3.0]]); } @@ -850,7 +967,77 @@ mod tests { fn test_iter_empty_data_buffer() { let meta = metadata_for_test(); let mut buffer = DataBuffer::with_capacity(meta.clone(), 10); - let mut iter = buffer.iter(&[0, 1, 3, 2]).unwrap(); + let mut iter = buffer.read(&[0, 1, 3, 2]).unwrap(); check_buffer_values_equal(&mut iter, &[]); } + + fn check_part_values_equal(iter: &mut DataPartReader, expected_values: &[Vec]) { + let mut output = Vec::with_capacity(expected_values.len()); + while iter.is_valid() { + let batch = iter.current_data_batch().slice_record_batch(); + let values = batch + .column_by_name("v1") + .unwrap() + .as_any() + .downcast_ref::() + .unwrap() + .iter() + .map(|v| v.unwrap()) + .collect::>(); + output.push(values); + iter.next().unwrap(); + } + assert_eq!(expected_values, output); + } + + fn check_iter_data_part(weights: &[u16], expected_values: &[Vec]) { + let meta = metadata_for_test(); + let mut buffer = DataBuffer::with_capacity(meta.clone(), 10); + + write_rows_to_buffer( + &mut buffer, + &meta, + 2, + vec![0, 1, 2], + vec![Some(1.0), Some(2.0), Some(3.0)], + 2, + ); + + write_rows_to_buffer( + &mut buffer, + &meta, + 3, + vec![1, 2, 3], + vec![Some(1.1), Some(2.1), Some(3.1)], + 3, + ); + + write_rows_to_buffer( + &mut buffer, + &meta, + 2, + vec![2, 3], + vec![Some(2.2), Some(2.3)], + 4, + ); + + let encoder = DataPartEncoder::new(&meta, weights, Some(4)); + let encoded = encoder.write(&mut buffer).unwrap(); + + let mut iter = encoded.read().unwrap(); + check_part_values_equal(&mut iter, expected_values); + } + + #[test] + fn test_iter_data_part() { + check_iter_data_part( + &[0, 1, 2, 3], + &[vec![1.0, 2.0, 3.0, 2.3], vec![1.1, 2.1, 3.1]], + ); + + check_iter_data_part( + &[3, 2, 1, 0], + &[vec![1.1, 2.1, 3.1], vec![1.0, 2.0, 3.0, 2.3]], + ); + } } diff --git a/src/mito2/src/memtable/merge_tree/partition.rs b/src/mito2/src/memtable/merge_tree/partition.rs index 0a5921c0ca5c..69c92ff69f3a 100644 --- a/src/mito2/src/memtable/merge_tree/partition.rs +++ b/src/mito2/src/memtable/merge_tree/partition.rs @@ -16,11 +16,20 @@ //! //! We only support partitioning the tree by pre-defined internal columns. +use std::collections::HashSet; use std::sync::{Arc, RwLock}; +use common_recordbatch::filter::SimpleFilterEvaluator; +use store_api::metadata::RegionMetadataRef; +use store_api::metric_engine_consts::DATA_SCHEMA_TABLE_ID_COLUMN_NAME; +use store_api::storage::ColumnId; + +use crate::error::Result; +use crate::memtable::key_values::KeyValue; +use crate::memtable::merge_tree::metrics::WriteMetrics; use crate::memtable::merge_tree::shard::Shard; use crate::memtable::merge_tree::shard_builder::ShardBuilder; -use crate::memtable::merge_tree::ShardId; +use crate::memtable::merge_tree::{MergeTreeConfig, PkId, ShardId}; /// Key of a partition. pub type PartitionKey = u32; @@ -30,13 +39,146 @@ pub struct Partition { inner: RwLock, } +impl Partition { + /// Creates a new partition. + pub fn new(_metadata: RegionMetadataRef, _config: &MergeTreeConfig) -> Self { + unimplemented!() + } + + /// Writes to the partition with a primary key. + pub fn write_with_key( + &self, + primary_key: &[u8], + key_value: KeyValue, + metrics: &mut WriteMetrics, + ) -> Result<()> { + let mut inner = self.inner.write().unwrap(); + // Now we ensure one key only exists in one shard. + if let Some(pk_id) = inner.find_key_in_shards(primary_key) { + // Key already in shards. + return inner.write_to_shard(pk_id, key_value); + } + + if inner.shard_builder.should_freeze() { + let shard_id = inner.active_shard_id; + let shard = inner.shard_builder.finish(shard_id)?; + inner.active_shard_id += 1; + inner.shards.push(shard); + } + + // Write to the shard builder. + inner + .shard_builder + .write_with_key(primary_key, key_value, metrics)?; + + Ok(()) + } + + /// Writes to the partition without a primary key. + pub fn write_no_key(&self, key_value: KeyValue, metrics: &mut WriteMetrics) -> Result<()> { + let mut inner = self.inner.write().unwrap(); + // If no primary key, always write to the first shard. + if inner.shards.is_empty() { + let shard_id = inner.active_shard_id; + inner.shards.push(Shard::new_no_dict(shard_id)); + inner.active_shard_id += 1; + } + + // A dummy pk id. + let pk_id = PkId { + shard_id: inner.active_shard_id - 1, + pk_index: 0, + }; + inner.shards[0].write_key_value(pk_id, key_value, metrics) + } + + /// Scans data in the partition. + pub fn scan( + &self, + _projection: HashSet, + _filters: Vec, + ) -> Result { + unimplemented!() + } + + /// Freezes the partition. + pub fn freeze(&self) -> Result<()> { + unimplemented!() + } + + /// Forks the partition. + pub fn fork(&self, _metadata: &RegionMetadataRef) -> Partition { + unimplemented!() + } + + /// Returns true if the partition has data. + pub fn has_data(&self) -> bool { + unimplemented!() + } + + /// Returns shared memory size of the partition. + pub fn shared_memory_size(&self) -> usize { + unimplemented!() + } + + /// Get partition key from the key value. + pub(crate) fn get_partition_key(key_value: &KeyValue, is_partitioned: bool) -> PartitionKey { + if !is_partitioned { + return PartitionKey::default(); + } + + let Some(value) = key_value.primary_keys().next() else { + return PartitionKey::default(); + }; + + value.as_u32().unwrap().unwrap() + } + + /// Returns true if the region can be partitioned. + pub(crate) fn has_multi_partitions(metadata: &RegionMetadataRef) -> bool { + metadata + .primary_key_columns() + .next() + .map(|meta| meta.column_schema.name == DATA_SCHEMA_TABLE_ID_COLUMN_NAME) + .unwrap_or(false) + } + + /// Returns true if this is a partition column. + pub(crate) fn is_partition_column(name: &str) -> bool { + name == DATA_SCHEMA_TABLE_ID_COLUMN_NAME + } +} + +/// Reader to scan rows in a partition. +/// +/// It can merge rows from multiple shards. +pub struct PartitionReader {} + pub type PartitionRef = Arc; /// Inner struct of the partition. +/// +/// A key only exists in one shard. struct Inner { /// Shard whose dictionary is active. shard_builder: ShardBuilder, - next_shard_id: ShardId, + active_shard_id: ShardId, /// Shards with frozon dictionary. shards: Vec, } + +impl Inner { + fn find_key_in_shards(&self, primary_key: &[u8]) -> Option { + for shard in &self.shards { + if let Some(pkid) = shard.find_key(primary_key) { + return Some(pkid); + } + } + + None + } + + fn write_to_shard(&mut self, _pk_id: PkId, _key_value: KeyValue) -> Result<()> { + unimplemented!() + } +} diff --git a/src/mito2/src/memtable/merge_tree/shard.rs b/src/mito2/src/memtable/merge_tree/shard.rs index d7fb74b6bafb..9eceb4920130 100644 --- a/src/mito2/src/memtable/merge_tree/shard.rs +++ b/src/mito2/src/memtable/merge_tree/shard.rs @@ -14,9 +14,17 @@ //! Shard in a partition. +use std::collections::HashSet; + +use common_recordbatch::filter::SimpleFilterEvaluator; +use store_api::storage::ColumnId; + +use crate::error::Result; +use crate::memtable::key_values::KeyValue; use crate::memtable::merge_tree::data::DataParts; use crate::memtable::merge_tree::dict::KeyDictRef; -use crate::memtable::merge_tree::ShardId; +use crate::memtable::merge_tree::metrics::WriteMetrics; +use crate::memtable::merge_tree::{PkId, ShardId}; /// Shard stores data related to the same key dictionary. pub struct Shard { @@ -26,3 +34,37 @@ pub struct Shard { /// Data in the shard. data_parts: DataParts, } + +impl Shard { + /// Returns a shard without dictionary. + pub fn new_no_dict(_shard_id: ShardId) -> Shard { + unimplemented!() + } + + /// Returns the pk id of the key if it exists. + pub fn find_key(&self, _key: &[u8]) -> Option { + unimplemented!() + } + + /// Writes a key value into the shard. + pub fn write_key_value( + &mut self, + _pk_id: PkId, + _key_value: KeyValue, + _metrics: &mut WriteMetrics, + ) -> Result<()> { + unimplemented!() + } + + /// Scans the shard. + pub fn scan( + &self, + _projection: &HashSet, + _filters: &[SimpleFilterEvaluator], + ) -> ShardReader { + unimplemented!() + } +} + +/// Reader to read rows in a shard. +pub struct ShardReader {} diff --git a/src/mito2/src/memtable/merge_tree/shard_builder.rs b/src/mito2/src/memtable/merge_tree/shard_builder.rs index a66366204989..c8d78029043c 100644 --- a/src/mito2/src/memtable/merge_tree/shard_builder.rs +++ b/src/mito2/src/memtable/merge_tree/shard_builder.rs @@ -14,8 +14,13 @@ //! Builder of a shard. +use crate::error::Result; +use crate::memtable::key_values::KeyValue; use crate::memtable::merge_tree::data::DataBuffer; use crate::memtable::merge_tree::dict::KeyDictBuilder; +use crate::memtable::merge_tree::metrics::WriteMetrics; +use crate::memtable::merge_tree::shard::Shard; +use crate::memtable::merge_tree::ShardId; /// Builder to write keys and data to a shard that the key dictionary /// is still active. @@ -24,4 +29,43 @@ pub struct ShardBuilder { dict_builder: KeyDictBuilder, /// Buffer to store data. data_buffer: DataBuffer, + /// Max keys in an index shard. + index_max_keys_per_shard: usize, + /// Number of rows to freeze a data part. + data_freeze_threshold: usize, } + +impl ShardBuilder { + /// Write a key value with its encoded primary key. + pub fn write_with_key( + &mut self, + _key: &[u8], + _key_value: KeyValue, + _metrics: &mut WriteMetrics, + ) -> Result<()> { + unimplemented!() + } + + /// Returns true if the builder is empty. + pub fn is_empty(&self) -> bool { + unimplemented!() + } + + /// Returns true if the builder need to freeze. + pub fn should_freeze(&self) -> bool { + unimplemented!() + } + + /// Builds a new shard and resets the builder. + pub fn finish(&mut self, _shard_id: ShardId) -> Result { + unimplemented!() + } + + /// Scans the shard builder + pub fn scan(&mut self, _shard_id: ShardId) -> Result { + unimplemented!() + } +} + +/// Reader to scan a shard. builder. +pub struct ShardBuilderReader {} diff --git a/src/mito2/src/memtable/merge_tree/tree.rs b/src/mito2/src/memtable/merge_tree/tree.rs index 39b6fbea9887..d9c26611f362 100644 --- a/src/mito2/src/memtable/merge_tree/tree.rs +++ b/src/mito2/src/memtable/merge_tree/tree.rs @@ -14,19 +14,31 @@ //! Implementation of the merge tree. -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashSet, VecDeque}; use std::sync::{Arc, RwLock}; +use api::v1::OpType; +use common_recordbatch::filter::SimpleFilterEvaluator; +use common_time::Timestamp; +use datafusion_common::ScalarValue; +use datatypes::arrow; +use datatypes::data_type::ConcreteDataType; +use snafu::ensure; use store_api::metadata::RegionMetadataRef; use store_api::storage::ColumnId; use table::predicate::Predicate; -use crate::error::Result; +use crate::error::{PrimaryKeyLengthMismatchSnafu, Result}; +use crate::memtable::key_values::KeyValue; use crate::memtable::merge_tree::metrics::WriteMetrics; -use crate::memtable::merge_tree::partition::{PartitionKey, PartitionRef}; +use crate::memtable::merge_tree::partition::{ + Partition, PartitionKey, PartitionReader, PartitionRef, +}; use crate::memtable::merge_tree::MergeTreeConfig; +use crate::memtable::time_series::primary_key_schema; use crate::memtable::{BoxedBatchIterator, KeyValues}; -use crate::row_converter::{McmpRowCodec, SortField}; +use crate::read::Batch; +use crate::row_converter::{McmpRowCodec, RowCodec, SortField}; /// The merge tree. pub struct MergeTree { @@ -38,6 +50,8 @@ pub struct MergeTree { row_codec: Arc, /// Partitions in the tree. partitions: RwLock>, + /// Whether the tree has multiple partitions. + is_partitioned: bool, } impl MergeTree { @@ -49,12 +63,14 @@ impl MergeTree { .map(|c| SortField::new(c.column_schema.data_type.clone())) .collect(), ); + let is_partitioned = Partition::has_multi_partitions(&metadata); MergeTree { config: config.clone(), metadata, row_codec: Arc::new(row_codec), partitions: Default::default(), + is_partitioned, } } @@ -63,39 +79,219 @@ impl MergeTree { /// /// # Panics /// Panics if the tree is immutable (frozen). - pub fn write(&self, _kvs: &KeyValues, _metrics: &mut WriteMetrics) -> Result<()> { - todo!() + pub fn write( + &self, + kvs: &KeyValues, + pk_buffer: &mut Vec, + metrics: &mut WriteMetrics, + ) -> Result<()> { + let has_pk = !self.metadata.primary_key.is_empty(); + + for kv in kvs.iter() { + ensure!( + kv.num_primary_keys() == self.row_codec.num_fields(), + PrimaryKeyLengthMismatchSnafu { + expect: self.row_codec.num_fields(), + actual: kv.num_primary_keys(), + } + ); + // Safety: timestamp of kv must be both present and a valid timestamp value. + let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value(); + metrics.min_ts = metrics.min_ts.min(ts); + metrics.max_ts = metrics.max_ts.max(ts); + metrics.value_bytes += kv.fields().map(|v| v.data_size()).sum::(); + + if !has_pk { + // No primary key. + self.write_no_key(kv, metrics)?; + continue; + } + + // Encode primary key. + pk_buffer.clear(); + self.row_codec.encode_to_vec(kv.primary_keys(), pk_buffer)?; + + // Write rows with primary keys. + self.write_with_key(pk_buffer, kv, metrics)?; + } + + metrics.value_bytes += + kvs.num_rows() * (std::mem::size_of::() + std::mem::size_of::()); + + Ok(()) } /// Scans the tree. pub fn scan( &self, - _projection: Option<&[ColumnId]>, - _predicate: Option, + projection: Option<&[ColumnId]>, + predicate: Option, ) -> Result { - todo!() + // Creates the projection set. + let projection: HashSet<_> = if let Some(projection) = projection { + projection.iter().copied().collect() + } else { + self.metadata.field_columns().map(|c| c.column_id).collect() + }; + + let filters = predicate + .map(|p| { + p.exprs() + .iter() + .filter_map(|f| SimpleFilterEvaluator::try_new(f.df_expr())) + .collect::>() + }) + .unwrap_or_default(); + + let partitions = self.prune_partitions(&filters); + let pk_schema = primary_key_schema(&self.metadata); + let pk_datatypes = self + .metadata + .primary_key_columns() + .map(|pk| pk.column_schema.data_type.clone()) + .collect(); + + let iter = TreeIter { + metadata: self.metadata.clone(), + pk_schema, + pk_datatypes, + projection, + filters, + row_codec: self.row_codec.clone(), + partitions, + current_reader: None, + }; + + Ok(Box::new(iter)) } /// Returns true if the tree is empty. + /// + /// A tree is empty if no partition has data. pub fn is_empty(&self) -> bool { - todo!() + let partitions = self.partitions.read().unwrap(); + partitions.values().all(|part| !part.has_data()) } /// Marks the tree as immutable. /// /// Once the tree becomes immutable, callers should not write to it again. pub fn freeze(&self) -> Result<()> { - todo!() + let partitions = self.partitions.read().unwrap(); + for partition in partitions.values() { + partition.freeze()?; + } + Ok(()) } /// Forks an immutable tree. Returns a mutable tree that inherits the index /// of this tree. - pub fn fork(&self, _metadata: RegionMetadataRef) -> MergeTree { - todo!() + pub fn fork(&self, metadata: RegionMetadataRef) -> MergeTree { + if self.metadata.schema_version != metadata.schema_version + || self.metadata.column_metadatas != metadata.column_metadatas + { + // The schema has changed, we can't reuse the tree. + return MergeTree::new(metadata, &self.config); + } + + let mut forked = BTreeMap::new(); + let partitions = self.partitions.read().unwrap(); + for (part_key, part) in partitions.iter() { + if !part.has_data() { + continue; + } + + // Only fork partitions that have data. + let forked_part = part.fork(&metadata); + forked.insert(*part_key, Arc::new(forked_part)); + } + + MergeTree { + config: self.config.clone(), + metadata, + row_codec: self.row_codec.clone(), + partitions: RwLock::new(forked), + is_partitioned: self.is_partitioned, + } } /// Returns the memory size shared by forked trees. pub fn shared_memory_size(&self) -> usize { - todo!() + let partitions = self.partitions.read().unwrap(); + partitions + .values() + .map(|part| part.shared_memory_size()) + .sum() + } + + fn write_with_key( + &self, + primary_key: &[u8], + key_value: KeyValue, + metrics: &mut WriteMetrics, + ) -> Result<()> { + let partition_key = Partition::get_partition_key(&key_value, self.is_partitioned); + let partition = self.get_or_create_partition(partition_key); + + partition.write_with_key(primary_key, key_value, metrics) + } + + fn write_no_key(&self, key_value: KeyValue, metrics: &mut WriteMetrics) -> Result<()> { + let partition_key = Partition::get_partition_key(&key_value, self.is_partitioned); + let partition = self.get_or_create_partition(partition_key); + + partition.write_no_key(key_value, metrics) + } + + fn get_or_create_partition(&self, partition_key: PartitionKey) -> PartitionRef { + let mut partitions = self.partitions.write().unwrap(); + partitions + .entry(partition_key) + .or_insert_with(|| Arc::new(Partition::new(self.metadata.clone(), &self.config))) + .clone() + } + + fn prune_partitions(&self, filters: &[SimpleFilterEvaluator]) -> VecDeque { + let partitions = self.partitions.read().unwrap(); + if self.is_partitioned { + // Prune partition keys. + for filter in filters { + // Only the first filter takes effect. + if Partition::is_partition_column(filter.column_name()) { + let mut pruned = VecDeque::new(); + for (key, partition) in partitions.iter() { + if filter + .evaluate_scalar(&ScalarValue::UInt32(Some(*key))) + .unwrap_or(true) + { + pruned.push_back(partition.clone()); + } + } + + return pruned; + } + } + } + + partitions.values().cloned().collect() + } +} + +struct TreeIter { + metadata: RegionMetadataRef, + pk_schema: arrow::datatypes::SchemaRef, + pk_datatypes: Vec, + projection: HashSet, + filters: Vec, + row_codec: Arc, + partitions: VecDeque, + current_reader: Option, +} + +impl Iterator for TreeIter { + type Item = Result; + + fn next(&mut self) -> Option { + unimplemented!() } } diff --git a/src/mito2/src/memtable/time_series.rs b/src/mito2/src/memtable/time_series.rs index 3c86d5cd3bb5..f2bbe2030d84 100644 --- a/src/mito2/src/memtable/time_series.rs +++ b/src/mito2/src/memtable/time_series.rs @@ -320,7 +320,9 @@ impl SeriesSet { /// Creates an arrow [SchemaRef](arrow::datatypes::SchemaRef) that only contains primary keys /// of given region schema -fn primary_key_schema(region_metadata: &RegionMetadataRef) -> arrow::datatypes::SchemaRef { +pub(crate) fn primary_key_schema( + region_metadata: &RegionMetadataRef, +) -> arrow::datatypes::SchemaRef { let fields = region_metadata .primary_key_columns() .map(|pk| { diff --git a/src/object-store/Cargo.toml b/src/object-store/Cargo.toml index 1dcd71b2d5df..c66e4dbc16fc 100644 --- a/src/object-store/Cargo.toml +++ b/src/object-store/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] async-trait = "0.1" bytes.workspace = true diff --git a/src/operator/Cargo.toml b/src/operator/Cargo.toml index 0543604528f9..a09c505fd4b3 100644 --- a/src/operator/Cargo.toml +++ b/src/operator/Cargo.toml @@ -7,6 +7,9 @@ license.workspace = true [features] testing = [] +[lints] +workspace = true + [dependencies] api.workspace = true async-trait = "0.1" @@ -58,3 +61,4 @@ tonic.workspace = true [dev-dependencies] common-test-util.workspace = true +path-slash = "0.2" diff --git a/src/operator/src/lib.rs b/src/operator/src/lib.rs index 53f931a87324..e672b488a9ac 100644 --- a/src/operator/src/lib.rs +++ b/src/operator/src/lib.rs @@ -17,6 +17,7 @@ pub mod error; pub mod expr_factory; pub mod insert; pub mod metrics; +pub mod procedure; pub mod region_req_factory; pub mod req_convert; pub mod statement; diff --git a/src/operator/src/procedure.rs b/src/operator/src/procedure.rs new file mode 100644 index 000000000000..d36e6b6b8579 --- /dev/null +++ b/src/operator/src/procedure.rs @@ -0,0 +1,56 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use async_trait::async_trait; +use common_error::ext::BoxedError; +use common_function::handlers::ProcedureServiceHandler; +use common_meta::ddl::{ExecutorContext, ProcedureExecutorRef}; +use common_meta::rpc::procedure::{MigrateRegionRequest, ProcedureStateResponse}; +use common_query::error as query_error; +use common_query::error::Result as QueryResult; +use snafu::ResultExt; + +/// The operator for procedures which implements [`ProcedureServiceHandler`]. +#[derive(Clone)] +pub struct ProcedureServiceOperator { + procedure_executor: ProcedureExecutorRef, +} + +impl ProcedureServiceOperator { + pub fn new(procedure_executor: ProcedureExecutorRef) -> Self { + Self { procedure_executor } + } +} + +#[async_trait] +impl ProcedureServiceHandler for ProcedureServiceOperator { + async fn migrate_region(&self, request: MigrateRegionRequest) -> QueryResult> { + Ok(self + .procedure_executor + .migrate_region(&ExecutorContext::default(), request) + .await + .map_err(BoxedError::new) + .context(query_error::ProcedureServiceSnafu)? + .pid + .map(|pid| String::from_utf8_lossy(&pid.key).to_string())) + } + + async fn query_procedure_state(&self, pid: &str) -> QueryResult { + self.procedure_executor + .query_procedure_state(&ExecutorContext::default(), pid) + .await + .map_err(BoxedError::new) + .context(query_error::ProcedureServiceSnafu) + } +} diff --git a/src/operator/src/statement.rs b/src/operator/src/statement.rs index f76823df9694..b0cd2c773b08 100644 --- a/src/operator/src/statement.rs +++ b/src/operator/src/statement.rs @@ -26,7 +26,7 @@ use std::sync::Arc; use catalog::CatalogManagerRef; use common_error::ext::BoxedError; use common_meta::cache_invalidator::CacheInvalidatorRef; -use common_meta::ddl::DdlTaskExecutorRef; +use common_meta::ddl::ProcedureExecutorRef; use common_meta::key::{TableMetadataManager, TableMetadataManagerRef}; use common_meta::kv_backend::KvBackendRef; use common_meta::table_name::TableName; @@ -61,7 +61,7 @@ use crate::table::table_idents_to_full_name; pub struct StatementExecutor { catalog_manager: CatalogManagerRef, query_engine: QueryEngineRef, - ddl_executor: DdlTaskExecutorRef, + procedure_executor: ProcedureExecutorRef, table_metadata_manager: TableMetadataManagerRef, partition_manager: PartitionRuleManagerRef, cache_invalidator: CacheInvalidatorRef, @@ -72,7 +72,7 @@ impl StatementExecutor { pub fn new( catalog_manager: CatalogManagerRef, query_engine: QueryEngineRef, - ddl_task_executor: DdlTaskExecutorRef, + procedure_executor: ProcedureExecutorRef, kv_backend: KvBackendRef, cache_invalidator: CacheInvalidatorRef, inserter: InserterRef, @@ -80,7 +80,7 @@ impl StatementExecutor { Self { catalog_manager, query_engine, - ddl_executor: ddl_task_executor, + procedure_executor, table_metadata_manager: Arc::new(TableMetadataManager::new(kv_backend.clone())), partition_manager: Arc::new(PartitionRuleManager::new(kv_backend)), cache_invalidator, diff --git a/src/operator/src/statement/copy_database.rs b/src/operator/src/statement/copy_database.rs index 0ab4d09cf752..63236e3eb9f6 100644 --- a/src/operator/src/statement/copy_database.rs +++ b/src/operator/src/statement/copy_database.rs @@ -206,6 +206,7 @@ mod tests { use object_store::services::Fs; use object_store::util::normalize_dir; use object_store::ObjectStore; + use path_slash::PathExt; use table::requests::CopyDatabaseRequest; use crate::statement::copy_database::{list_files_to_copy, parse_file_name_to_copy}; @@ -223,10 +224,11 @@ mod tests { object_store.write("d", "").await.unwrap(); object_store.write("e.f.parquet", "").await.unwrap(); + let location = normalize_dir(&dir.path().to_slash().unwrap()); let request = CopyDatabaseRequest { catalog_name: "catalog_0".to_string(), schema_name: "schema_0".to_string(), - location: store_dir, + location, with: [("FORMAT".to_string(), "parquet".to_string())] .into_iter() .collect(), diff --git a/src/operator/src/statement/ddl.rs b/src/operator/src/statement/ddl.rs index 2a20f34cd667..9231ba8d5c0e 100644 --- a/src/operator/src/statement/ddl.rs +++ b/src/operator/src/statement/ddl.rs @@ -404,7 +404,7 @@ impl StatementExecutor { task: DdlTask::new_alter_table(expr.clone()), }; - self.ddl_executor + self.procedure_executor .submit_ddl_task(&ExecutorContext::default(), req) .await .context(error::ExecuteDdlSnafu)?; @@ -438,7 +438,7 @@ impl StatementExecutor { task: DdlTask::new_create_table(create_table, partitions, table_info), }; - self.ddl_executor + self.procedure_executor .submit_ddl_task(&ExecutorContext::default(), request) .await .context(error::ExecuteDdlSnafu) @@ -452,7 +452,7 @@ impl StatementExecutor { task: DdlTask::new_create_logical_tables(tables_data), }; - self.ddl_executor + self.procedure_executor .submit_ddl_task(&ExecutorContext::default(), request) .await .context(error::ExecuteDdlSnafu) @@ -474,7 +474,7 @@ impl StatementExecutor { ), }; - self.ddl_executor + self.procedure_executor .submit_ddl_task(&ExecutorContext::default(), request) .await .context(error::ExecuteDdlSnafu) @@ -494,7 +494,7 @@ impl StatementExecutor { ), }; - self.ddl_executor + self.procedure_executor .submit_ddl_task(&ExecutorContext::default(), request) .await .context(error::ExecuteDdlSnafu) diff --git a/src/operator/src/table.rs b/src/operator/src/table.rs index 8d53a39c7c49..38271abb87af 100644 --- a/src/operator/src/table.rs +++ b/src/operator/src/table.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::time::Duration; - use async_trait::async_trait; use common_error::ext::BoxedError; use common_function::handlers::{AffectedRows, TableMutationHandler}; @@ -95,15 +93,4 @@ impl TableMutationHandler for TableMutationOperator { .map_err(BoxedError::new) .context(query_error::TableMutationSnafu) } - - async fn migrate_region( - &self, - _region_id: u64, - _from_peer: u64, - _to_peer: u64, - _replay_timeout: Duration, - ) -> QueryResult { - // FIXME(dennis): implemented in the following PR. - todo!(); - } } diff --git a/src/partition/Cargo.toml b/src/partition/Cargo.toml index 4fadc02b590c..48f7c036b701 100644 --- a/src/partition/Cargo.toml +++ b/src/partition/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] api.workspace = true async-trait = "0.1" diff --git a/src/partition/src/manager.rs b/src/partition/src/manager.rs index ab757a3d2edf..cc20e09d2a5f 100644 --- a/src/partition/src/manager.rs +++ b/src/partition/src/manager.rs @@ -74,23 +74,20 @@ impl PartitionRuleManager { Ok(route.region_routes) } - pub async fn find_region_routes_batch( + pub async fn batch_find_region_routes( &self, table_ids: &[TableId], ) -> Result>> { let table_routes = self .table_route_manager - .batch_get(table_ids) + .batch_get_physical_table_routes(table_ids) .await .context(error::TableRouteManagerSnafu)?; let mut table_region_routes = HashMap::with_capacity(table_routes.len()); for (table_id, table_route) in table_routes { - let region_routes = table_route - .region_routes() - .context(error::TableRouteManagerSnafu)? - .clone(); + let region_routes = table_route.region_routes; table_region_routes.insert(table_id, region_routes); } @@ -104,40 +101,25 @@ impl PartitionRuleManager { error::FindTableRoutesSnafu { table_id } ); - let mut partitions = Vec::with_capacity(region_routes.len()); - for r in region_routes { - let partition = r - .region - .partition - .clone() - .context(error::FindRegionRoutesSnafu { - region_id: r.region.id, - table_id, - })?; - let partition_def = PartitionDef::try_from(partition)?; + create_partitions_from_region_routes(table_id, region_routes) + } - partitions.push(PartitionInfo { - id: r.region.id, - partition: partition_def, - }); - } - partitions.sort_by(|a, b| { - a.partition - .partition_bounds() - .cmp(b.partition.partition_bounds()) - }); + pub async fn batch_find_table_partitions( + &self, + table_ids: &[TableId], + ) -> Result>> { + let batch_region_routes = self.batch_find_region_routes(table_ids).await?; - ensure!( - partitions - .windows(2) - .all(|w| w[0].partition.partition_columns() == w[1].partition.partition_columns()), - error::InvalidTableRouteDataSnafu { + let mut results = HashMap::with_capacity(table_ids.len()); + + for (table_id, region_routes) in batch_region_routes { + results.insert( table_id, - err_msg: "partition columns of all regions are not the same" - } - ); + create_partitions_from_region_routes(table_id, region_routes)?, + ); + } - Ok(partitions) + Ok(results) } /// Get partition rule of given table. @@ -237,6 +219,46 @@ impl PartitionRuleManager { } } +fn create_partitions_from_region_routes( + table_id: TableId, + region_routes: Vec, +) -> Result> { + let mut partitions = Vec::with_capacity(region_routes.len()); + for r in region_routes { + let partition = r + .region + .partition + .clone() + .context(error::FindRegionRoutesSnafu { + region_id: r.region.id, + table_id, + })?; + let partition_def = PartitionDef::try_from(partition)?; + + partitions.push(PartitionInfo { + id: r.region.id, + partition: partition_def, + }); + } + partitions.sort_by(|a, b| { + a.partition + .partition_bounds() + .cmp(b.partition.partition_bounds()) + }); + + ensure!( + partitions + .windows(2) + .all(|w| w[0].partition.partition_columns() == w[1].partition.partition_columns()), + error::InvalidTableRouteDataSnafu { + table_id, + err_msg: "partition columns of all regions are not the same" + } + ); + + Ok(partitions) +} + fn find_regions0(partition_rule: PartitionRuleRef, filter: &Expr) -> Result> { let expr = filter.df_expr(); match expr { diff --git a/src/plugins/Cargo.toml b/src/plugins/Cargo.toml index a11150f7f34e..c927fb3d33c5 100644 --- a/src/plugins/Cargo.toml +++ b/src/plugins/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] auth.workspace = true common-base.workspace = true diff --git a/src/promql/Cargo.toml b/src/promql/Cargo.toml index fc8533656746..9e7bc01ae8ac 100644 --- a/src/promql/Cargo.toml +++ b/src/promql/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] ahash.workspace = true async-recursion = "1.0" diff --git a/src/puffin/Cargo.toml b/src/puffin/Cargo.toml index 0a42084e390a..7e43c29e39e5 100644 --- a/src/puffin/Cargo.toml +++ b/src/puffin/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] async-trait.workspace = true bitflags.workspace = true diff --git a/src/query/Cargo.toml b/src/query/Cargo.toml index 8976a6967ed9..7b252d69107e 100644 --- a/src/query/Cargo.toml +++ b/src/query/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] ahash.workspace = true api.workspace = true diff --git a/src/query/src/datafusion.rs b/src/query/src/datafusion.rs index 458d2e8708e6..03945da66c9d 100644 --- a/src/query/src/datafusion.rs +++ b/src/query/src/datafusion.rs @@ -564,7 +564,7 @@ mod tests { }; catalog_manager.register_table_sync(req).unwrap(); - QueryEngineFactory::new(catalog_manager, None, None, false).query_engine() + QueryEngineFactory::new(catalog_manager, None, None, None, false).query_engine() } #[tokio::test] diff --git a/src/query/src/query_engine.rs b/src/query/src/query_engine.rs index 8a4edffc4aef..18923f3b96ad 100644 --- a/src/query/src/query_engine.rs +++ b/src/query/src/query_engine.rs @@ -24,7 +24,7 @@ use catalog::CatalogManagerRef; use common_base::Plugins; use common_function::function::FunctionRef; use common_function::function_registry::FUNCTION_REGISTRY; -use common_function::handlers::TableMutationHandlerRef; +use common_function::handlers::{ProcedureServiceHandlerRef, TableMutationHandlerRef}; use common_function::scalars::aggregate::AggregateFunctionMetaRef; use common_query::prelude::ScalarUdf; use common_query::Output; @@ -101,12 +101,14 @@ impl QueryEngineFactory { catalog_manager: CatalogManagerRef, region_query_handler: Option, table_mutation_handler: Option, + procedure_service_handler: Option, with_dist_planner: bool, ) -> Self { Self::new_with_plugins( catalog_manager, region_query_handler, table_mutation_handler, + procedure_service_handler, with_dist_planner, Default::default(), ) @@ -116,6 +118,7 @@ impl QueryEngineFactory { catalog_manager: CatalogManagerRef, region_query_handler: Option, table_mutation_handler: Option, + procedure_service_handler: Option, with_dist_planner: bool, plugins: Plugins, ) -> Self { @@ -123,6 +126,7 @@ impl QueryEngineFactory { catalog_manager, region_query_handler, table_mutation_handler, + procedure_service_handler, with_dist_planner, plugins.clone(), )); @@ -156,7 +160,7 @@ mod tests { #[test] fn test_query_engine_factory() { let catalog_list = catalog::memory::new_memory_catalog_manager().unwrap(); - let factory = QueryEngineFactory::new(catalog_list, None, None, false); + let factory = QueryEngineFactory::new(catalog_list, None, None, None, false); let engine = factory.query_engine(); diff --git a/src/query/src/query_engine/context.rs b/src/query/src/query_engine/context.rs index b90b7d41f613..f76332cde2b4 100644 --- a/src/query/src/query_engine/context.rs +++ b/src/query/src/query_engine/context.rs @@ -70,6 +70,7 @@ impl QueryEngineContext { catalog::memory::new_memory_catalog_manager().unwrap(), None, None, + None, false, Plugins::default(), )); diff --git a/src/query/src/query_engine/state.rs b/src/query/src/query_engine/state.rs index f5a6a828a420..18af09973e57 100644 --- a/src/query/src/query_engine/state.rs +++ b/src/query/src/query_engine/state.rs @@ -20,7 +20,7 @@ use async_trait::async_trait; use catalog::CatalogManagerRef; use common_base::Plugins; use common_function::function::FunctionRef; -use common_function::handlers::{MetaServiceHandlerRef, TableMutationHandlerRef}; +use common_function::handlers::{ProcedureServiceHandlerRef, TableMutationHandlerRef}; use common_function::scalars::aggregate::AggregateFunctionMetaRef; use common_function::state::FunctionState; use common_query::physical_plan::SessionContext; @@ -80,6 +80,7 @@ impl QueryEngineState { catalog_list: CatalogManagerRef, region_query_handler: Option, table_mutation_handler: Option, + procedure_service_handler: Option, with_dist_planner: bool, plugins: Plugins, ) -> Self { @@ -120,8 +121,7 @@ impl QueryEngineState { catalog_manager: catalog_list, function_state: Arc::new(FunctionState { table_mutation_handler, - // FIXME(dennis): implemented in the following PR. - meta_service_handler: None, + procedure_service_handler, }), aggregate_functions: Arc::new(RwLock::new(HashMap::new())), extension_rules, @@ -219,9 +219,9 @@ impl QueryEngineState { self.function_state.table_mutation_handler.as_ref() } - /// Returns the [`MetaServiceHandlerRef`] in state. - pub fn meta_service_handler(&self) -> Option<&MetaServiceHandlerRef> { - self.function_state.meta_service_handler.as_ref() + /// Returns the [`ProcedureServiceHandlerRef`] in state. + pub fn procedure_service_handler(&self) -> Option<&ProcedureServiceHandlerRef> { + self.function_state.procedure_service_handler.as_ref() } pub(crate) fn disallow_cross_catalog_query(&self) -> bool { diff --git a/src/query/src/range_select/plan_rewrite.rs b/src/query/src/range_select/plan_rewrite.rs index 36c007a4d9e6..d55fe65786fb 100644 --- a/src/query/src/range_select/plan_rewrite.rs +++ b/src/query/src/range_select/plan_rewrite.rs @@ -533,7 +533,7 @@ mod test { table, }) .is_ok()); - QueryEngineFactory::new(catalog_list, None, None, false).query_engine() + QueryEngineFactory::new(catalog_list, None, None, None, false).query_engine() } async fn do_query(sql: &str) -> Result { diff --git a/src/query/src/tests.rs b/src/query/src/tests.rs index ebf135d06e71..c2c8ace323bc 100644 --- a/src/query/src/tests.rs +++ b/src/query/src/tests.rs @@ -52,5 +52,5 @@ async fn exec_selection(engine: QueryEngineRef, sql: &str) -> Vec { pub fn new_query_engine_with_table(table: TableRef) -> QueryEngineRef { let catalog_manager = MemoryCatalogManager::new_with_table(table); - QueryEngineFactory::new(catalog_manager, None, None, false).query_engine() + QueryEngineFactory::new(catalog_manager, None, None, None, false).query_engine() } diff --git a/src/query/src/tests/query_engine_test.rs b/src/query/src/tests/query_engine_test.rs index 067747f2636a..3fcddd504322 100644 --- a/src/query/src/tests/query_engine_test.rs +++ b/src/query/src/tests/query_engine_test.rs @@ -47,7 +47,7 @@ async fn test_datafusion_query_engine() -> Result<()> { let catalog_list = catalog::memory::new_memory_catalog_manager() .map_err(BoxedError::new) .context(QueryExecutionSnafu)?; - let factory = QueryEngineFactory::new(catalog_list, None, None, false); + let factory = QueryEngineFactory::new(catalog_list, None, None, None, false); let engine = factory.query_engine(); let column_schemas = vec![ColumnSchema::new( @@ -128,7 +128,8 @@ async fn test_query_validate() -> Result<()> { disallow_cross_catalog_query: true, }); - let factory = QueryEngineFactory::new_with_plugins(catalog_list, None, None, false, plugins); + let factory = + QueryEngineFactory::new_with_plugins(catalog_list, None, None, None, false, plugins); let engine = factory.query_engine(); let stmt = @@ -158,7 +159,7 @@ async fn test_udf() -> Result<()> { common_telemetry::init_default_ut_logging(); let catalog_list = catalog_manager()?; - let factory = QueryEngineFactory::new(catalog_list, None, None, false); + let factory = QueryEngineFactory::new(catalog_list, None, None, None, false); let engine = factory.query_engine(); let pow = make_scalar_function(pow); diff --git a/src/query/src/tests/time_range_filter_test.rs b/src/query/src/tests/time_range_filter_test.rs index 1f5a926d79c5..c47b4e817c0a 100644 --- a/src/query/src/tests/time_range_filter_test.rs +++ b/src/query/src/tests/time_range_filter_test.rs @@ -106,7 +106,7 @@ fn create_test_engine() -> TimeRangeTester { }; let _ = catalog_manager.register_table_sync(req).unwrap(); - let engine = QueryEngineFactory::new(catalog_manager, None, None, false).query_engine(); + let engine = QueryEngineFactory::new(catalog_manager, None, None, None, false).query_engine(); TimeRangeTester { engine, filter } } diff --git a/src/script/Cargo.toml b/src/script/Cargo.toml index 825e36df99fa..81e22374861a 100644 --- a/src/script/Cargo.toml +++ b/src/script/Cargo.toml @@ -23,6 +23,9 @@ python = [ "dep:paste", ] +[lints] +workspace = true + [dependencies] api.workspace = true arc-swap = "1.0" diff --git a/src/script/benches/py_benchmark.rs b/src/script/benches/py_benchmark.rs index a4ead66f1685..6568b21a2287 100644 --- a/src/script/benches/py_benchmark.rs +++ b/src/script/benches/py_benchmark.rs @@ -52,7 +52,8 @@ where pub(crate) fn sample_script_engine() -> PyEngine { let catalog_manager = MemoryCatalogManager::new_with_table(NumbersTable::table(NUMBERS_TABLE_ID)); - let query_engine = QueryEngineFactory::new(catalog_manager, None, None, false).query_engine(); + let query_engine = + QueryEngineFactory::new(catalog_manager, None, None, None, false).query_engine(); PyEngine::new(query_engine.clone()) } diff --git a/src/script/src/python/engine.rs b/src/script/src/python/engine.rs index c2e8eccb45a2..a0991565acfd 100644 --- a/src/script/src/python/engine.rs +++ b/src/script/src/python/engine.rs @@ -385,7 +385,7 @@ mod tests { let catalog_manager = MemoryCatalogManager::new_with_table(NumbersTable::table(NUMBERS_TABLE_ID)); let query_engine = - QueryEngineFactory::new(catalog_manager, None, None, false).query_engine(); + QueryEngineFactory::new(catalog_manager, None, None, None, false).query_engine(); PyEngine::new(query_engine.clone()) } diff --git a/src/script/src/test.rs b/src/script/src/test.rs index 55ba73f582a1..b2beb799a7f9 100644 --- a/src/script/src/test.rs +++ b/src/script/src/test.rs @@ -56,7 +56,7 @@ pub async fn setup_scripts_manager( let catalog_manager = MemoryCatalogManager::new_with_table(table.clone()); - let factory = QueryEngineFactory::new(catalog_manager.clone(), None, None, false); + let factory = QueryEngineFactory::new(catalog_manager.clone(), None, None, None, false); let query_engine = factory.query_engine(); let mgr = ScriptManager::new(Arc::new(MockGrpcQueryHandler {}) as _, query_engine) .await diff --git a/src/servers/Cargo.toml b/src/servers/Cargo.toml index 521c3023b671..cfcbd1dfe2b9 100644 --- a/src/servers/Cargo.toml +++ b/src/servers/Cargo.toml @@ -10,6 +10,9 @@ mem-prof = ["dep:common-mem-prof"] pprof = ["dep:pprof"] testing = [] +[lints] +workspace = true + [dependencies] aide = { version = "0.9", features = ["axum"] } api.workspace = true diff --git a/src/servers/src/error.rs b/src/servers/src/error.rs index e4b25adcb305..4ebbdc55445c 100644 --- a/src/servers/src/error.rs +++ b/src/servers/src/error.rs @@ -596,9 +596,11 @@ macro_rules! define_into_tonic_status { ($Error: ty) => { impl From<$Error> for tonic::Status { fn from(err: $Error) -> Self { - use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG}; use tonic::codegen::http::{HeaderMap, HeaderValue}; use tonic::metadata::MetadataMap; + use $crate::http::header::constants::{ + GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG, + }; let mut headers = HeaderMap::::with_capacity(2); diff --git a/src/servers/src/http/arrow_result.rs b/src/servers/src/http/arrow_result.rs index b47912b1617e..3daad34f1d00 100644 --- a/src/servers/src/http/arrow_result.rs +++ b/src/servers/src/http/arrow_result.rs @@ -17,7 +17,7 @@ use std::sync::Arc; use arrow::datatypes::Schema; use arrow_ipc::writer::FileWriter; -use axum::http::{header, HeaderName, HeaderValue}; +use axum::http::{header, HeaderValue}; use axum::response::{IntoResponse, Response}; use common_error::status_code::StatusCode; use common_query::Output; @@ -122,15 +122,15 @@ impl IntoResponse for ArrowResponse { ( [ ( - header::CONTENT_TYPE, + &header::CONTENT_TYPE, HeaderValue::from_static("application/arrow"), ), ( - HeaderName::from_static(GREPTIME_DB_HEADER_FORMAT), + &GREPTIME_DB_HEADER_FORMAT, HeaderValue::from_static("ARROW"), ), ( - HeaderName::from_static(GREPTIME_DB_HEADER_EXECUTION_TIME), + &GREPTIME_DB_HEADER_EXECUTION_TIME, HeaderValue::from(execution_time), ), ], diff --git a/src/servers/src/http/csv_result.rs b/src/servers/src/http/csv_result.rs index 30c5d4a0264d..f0d377b01e54 100644 --- a/src/servers/src/http/csv_result.rs +++ b/src/servers/src/http/csv_result.rs @@ -101,9 +101,9 @@ impl IntoResponse for CsvResponse { ) .into_response(); resp.headers_mut() - .insert(GREPTIME_DB_HEADER_FORMAT, HeaderValue::from_static("CSV")); + .insert(&GREPTIME_DB_HEADER_FORMAT, HeaderValue::from_static("CSV")); resp.headers_mut().insert( - GREPTIME_DB_HEADER_EXECUTION_TIME, + &GREPTIME_DB_HEADER_EXECUTION_TIME, HeaderValue::from(execution_time), ); resp diff --git a/src/servers/src/http/error_result.rs b/src/servers/src/http/error_result.rs index 629594e66456..57a4bd698105 100644 --- a/src/servers/src/http/error_result.rs +++ b/src/servers/src/http/error_result.rs @@ -17,11 +17,11 @@ use axum::response::{IntoResponse, Response}; use axum::Json; use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; -use common_error::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG}; use common_telemetry::logging::{debug, error}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use crate::http::header::constants::{GREPTIME_DB_HEADER_ERROR_CODE, GREPTIME_DB_HEADER_ERROR_MSG}; use crate::http::header::{GREPTIME_DB_HEADER_EXECUTION_TIME, GREPTIME_DB_HEADER_FORMAT}; use crate::http::ResponseFormat; @@ -88,9 +88,9 @@ impl IntoResponse for ErrorResponse { HeaderValue::from_str(&msg).expect("malformed error msg"), ); resp.headers_mut() - .insert(GREPTIME_DB_HEADER_FORMAT, HeaderValue::from_static(ty)); + .insert(&GREPTIME_DB_HEADER_FORMAT, HeaderValue::from_static(ty)); resp.headers_mut().insert( - GREPTIME_DB_HEADER_EXECUTION_TIME, + &GREPTIME_DB_HEADER_EXECUTION_TIME, HeaderValue::from(execution_time), ); resp diff --git a/src/servers/src/http/greptime_result_v1.rs b/src/servers/src/http/greptime_result_v1.rs index 596f1bcfdd8f..1efeeccda25c 100644 --- a/src/servers/src/http/greptime_result_v1.rs +++ b/src/servers/src/http/greptime_result_v1.rs @@ -76,15 +76,15 @@ impl IntoResponse for GreptimedbV1Response { let mut resp = Json(self).into_response(); resp.headers_mut().insert( - GREPTIME_DB_HEADER_FORMAT, + &GREPTIME_DB_HEADER_FORMAT, HeaderValue::from_static("greptimedb_v1"), ); resp.headers_mut().insert( - GREPTIME_DB_HEADER_EXECUTION_TIME, + &GREPTIME_DB_HEADER_EXECUTION_TIME, HeaderValue::from(execution_time), ); if let Some(m) = metrics.and_then(|m| HeaderValue::from_str(&m).ok()) { - resp.headers_mut().insert(GREPTIME_DB_HEADER_METRICS, m); + resp.headers_mut().insert(&GREPTIME_DB_HEADER_METRICS, m); } resp diff --git a/src/servers/src/http/header.rs b/src/servers/src/http/header.rs index aa0970dbdb3c..fd5dc8c43038 100644 --- a/src/servers/src/http/header.rs +++ b/src/servers/src/http/header.rs @@ -14,16 +14,45 @@ use headers::{Header, HeaderName, HeaderValue}; -pub const GREPTIME_DB_HEADER_FORMAT: &str = "x-greptime-format"; -pub const GREPTIME_DB_HEADER_EXECUTION_TIME: &str = "x-greptime-execution-time"; -pub const GREPTIME_DB_HEADER_METRICS: &str = "x-greptime-metrics"; +pub mod constants { + // New HTTP headers would better distinguish use cases among: + // * GreptimeDB + // * GreptimeCloud + // * ... + // + // And thus trying to use: + // * x-greptime-db-xxx + // * x-greptime-cloud-xxx + // + // ... accordingly + // + // Most of the headers are for GreptimeDB and thus using `x-greptime-db-` as prefix. + // Only use `x-greptime-cloud` when it's intentionally used by GreptimeCloud. + + // LEGACY HEADERS - KEEP IT UNMODIFIED + pub const GREPTIME_DB_HEADER_FORMAT: &str = "x-greptime-format"; + pub const GREPTIME_DB_HEADER_EXECUTION_TIME: &str = "x-greptime-execution-time"; + pub const GREPTIME_DB_HEADER_METRICS: &str = "x-greptime-metrics"; + pub const GREPTIME_DB_HEADER_NAME: &str = "x-greptime-db-name"; + pub const GREPTIME_TIMEZONE_HEADER_NAME: &str = "x-greptime-timezone"; + pub const GREPTIME_DB_HEADER_ERROR_CODE: &str = common_error::GREPTIME_DB_HEADER_ERROR_CODE; + pub const GREPTIME_DB_HEADER_ERROR_MSG: &str = common_error::GREPTIME_DB_HEADER_ERROR_MSG; +} + +pub static GREPTIME_DB_HEADER_FORMAT: HeaderName = + HeaderName::from_static(constants::GREPTIME_DB_HEADER_FORMAT); +pub static GREPTIME_DB_HEADER_EXECUTION_TIME: HeaderName = + HeaderName::from_static(constants::GREPTIME_DB_HEADER_EXECUTION_TIME); +pub static GREPTIME_DB_HEADER_METRICS: HeaderName = + HeaderName::from_static(constants::GREPTIME_DB_HEADER_METRICS); /// Header key of `db-name`. Example format of the header value is `greptime-public`. -pub static GREPTIME_DB_HEADER_NAME: HeaderName = HeaderName::from_static("x-greptime-db-name"); -/// Header key of query specific timezone. -/// Example format of the header value is `Asia/Shanghai` or `+08:00`. +pub static GREPTIME_DB_HEADER_NAME: HeaderName = + HeaderName::from_static(constants::GREPTIME_DB_HEADER_NAME); + +/// Header key of query specific timezone. Example format of the header value is `Asia/Shanghai` or `+08:00`. pub static GREPTIME_TIMEZONE_HEADER_NAME: HeaderName = - HeaderName::from_static("x-greptime-timezone"); + HeaderName::from_static(constants::GREPTIME_TIMEZONE_HEADER_NAME); pub struct GreptimeDbName(Option); diff --git a/src/servers/src/http/influxdb_result_v1.rs b/src/servers/src/http/influxdb_result_v1.rs index a4e8206058df..05525ea128a6 100644 --- a/src/servers/src/http/influxdb_result_v1.rs +++ b/src/servers/src/http/influxdb_result_v1.rs @@ -217,11 +217,11 @@ impl IntoResponse for InfluxdbV1Response { let execution_time = self.execution_time_ms; let mut resp = Json(self).into_response(); resp.headers_mut().insert( - GREPTIME_DB_HEADER_FORMAT, + &GREPTIME_DB_HEADER_FORMAT, HeaderValue::from_static("influxdb_v1"), ); resp.headers_mut().insert( - GREPTIME_DB_HEADER_EXECUTION_TIME, + &GREPTIME_DB_HEADER_EXECUTION_TIME, HeaderValue::from(execution_time), ); resp diff --git a/src/servers/src/http/prometheus_resp.rs b/src/servers/src/http/prometheus_resp.rs index 3deb0b109143..e7a310faf5b0 100644 --- a/src/servers/src/http/prometheus_resp.rs +++ b/src/servers/src/http/prometheus_resp.rs @@ -66,7 +66,7 @@ impl IntoResponse for PrometheusJsonResponse { let mut resp = Json(self).into_response(); if let Some(m) = metrics.and_then(|m| HeaderValue::from_str(&m).ok()) { - resp.headers_mut().insert(GREPTIME_DB_HEADER_METRICS, m); + resp.headers_mut().insert(&GREPTIME_DB_HEADER_METRICS, m); } resp diff --git a/src/servers/tests/http/influxdb_test.rs b/src/servers/tests/http/influxdb_test.rs index 81f17181093b..9b68802bb4c6 100644 --- a/src/servers/tests/http/influxdb_test.rs +++ b/src/servers/tests/http/influxdb_test.rs @@ -169,7 +169,7 @@ async fn test_influxdb_write() { .await; assert_eq!(result.status(), 401); assert_eq!( - result.headers().get(GREPTIME_DB_HEADER_FORMAT).unwrap(), + result.headers().get(&GREPTIME_DB_HEADER_FORMAT).unwrap(), "influxdb_v1", ); assert_eq!( @@ -185,7 +185,7 @@ async fn test_influxdb_write() { .await; assert_eq!(result.status(), 401); assert_eq!( - result.headers().get(GREPTIME_DB_HEADER_FORMAT).unwrap(), + result.headers().get(&GREPTIME_DB_HEADER_FORMAT).unwrap(), "influxdb_v1", ); assert_eq!( diff --git a/src/servers/tests/mod.rs b/src/servers/tests/mod.rs index ff60ae007fa6..284935673650 100644 --- a/src/servers/tests/mod.rs +++ b/src/servers/tests/mod.rs @@ -214,7 +214,8 @@ impl GrpcQueryHandler for DummyInstance { fn create_testing_instance(table: TableRef) -> DummyInstance { let catalog_manager = MemoryCatalogManager::new_with_table(table); - let query_engine = QueryEngineFactory::new(catalog_manager, None, None, false).query_engine(); + let query_engine = + QueryEngineFactory::new(catalog_manager, None, None, None, false).query_engine(); DummyInstance::new(query_engine) } diff --git a/src/session/Cargo.toml b/src/session/Cargo.toml index 1bd7488f2c8f..4f044fdf7c72 100644 --- a/src/session/Cargo.toml +++ b/src/session/Cargo.toml @@ -7,6 +7,9 @@ license.workspace = true [features] testing = [] +[lints] +workspace = true + [dependencies] api.workspace = true arc-swap = "1.5" diff --git a/src/sql/Cargo.toml b/src/sql/Cargo.toml index d72a66721fd1..dcce4b698763 100644 --- a/src/sql/Cargo.toml +++ b/src/sql/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] api.workspace = true common-base.workspace = true diff --git a/src/store-api/Cargo.toml b/src/store-api/Cargo.toml index 3bc01aa6b579..fc5f2b1690c4 100644 --- a/src/store-api/Cargo.toml +++ b/src/store-api/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] api.workspace = true aquamarine.workspace = true diff --git a/src/table/Cargo.toml b/src/table/Cargo.toml index 2fe1fb85bd2e..17e8d6532ba1 100644 --- a/src/table/Cargo.toml +++ b/src/table/Cargo.toml @@ -7,6 +7,9 @@ license.workspace = true [features] testing = [] +[lints] +workspace = true + [dependencies] anymap = "1.0.0-beta.2" async-trait = "0.1" diff --git a/tests-fuzz/Cargo.toml b/tests-fuzz/Cargo.toml index 0623a4ea29b8..e16e406587c2 100644 --- a/tests-fuzz/Cargo.toml +++ b/tests-fuzz/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] async-trait = { workspace = true } common-error = { workspace = true } diff --git a/tests-integration/Cargo.toml b/tests-integration/Cargo.toml index 35645b6a2cc2..1914fe720b64 100644 --- a/tests-integration/Cargo.toml +++ b/tests-integration/Cargo.toml @@ -7,6 +7,9 @@ license.workspace = true [features] dashboard = [] +[lints] +workspace = true + [dependencies] api.workspace = true arrow-flight.workspace = true diff --git a/tests-integration/src/cluster.rs b/tests-integration/src/cluster.rs index cace9345ca83..026261551204 100644 --- a/tests-integration/src/cluster.rs +++ b/tests-integration/src/cluster.rs @@ -345,7 +345,7 @@ impl GreptimeDbClusterBuilder { .enable_store() .enable_heartbeat() .channel_manager(meta_srv.channel_manager) - .enable_ddl() + .enable_procedure() .build(); meta_client.start(&[&meta_srv.server_addr]).await.unwrap(); let meta_client = Arc::new(meta_client); diff --git a/tests-integration/tests/region_migration.rs b/tests-integration/tests/region_migration.rs index e1aa74b6bcc2..9831eca35939 100644 --- a/tests-integration/tests/region_migration.rs +++ b/tests-integration/tests/region_migration.rs @@ -19,11 +19,15 @@ use client::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use common_meta::key::{RegionDistribution, TableMetadataManagerRef}; use common_meta::peer::Peer; use common_query::Output; +use common_recordbatch::RecordBatches; use common_telemetry::info; use common_test_util::recordbatch::check_output_stream; use common_test_util::temp_dir::create_temp_dir; use common_wal::config::kafka::{DatanodeKafkaConfig, MetaSrvKafkaConfig}; use common_wal::config::{DatanodeWalConfig, MetaSrvWalConfig}; +use datatypes::prelude::ScalarVector; +use datatypes::value::Value; +use datatypes::vectors::{Helper, UInt64Vector}; use frontend::error::Result as FrontendResult; use frontend::instance::Instance; use futures::future::BoxFuture; @@ -76,6 +80,7 @@ macro_rules! region_migration_tests { $service, test_region_migration, + test_region_migration_by_sql, test_region_migration_multiple_regions, test_region_migration_all_regions, test_region_migration_incorrect_from_peer, @@ -212,6 +217,125 @@ pub async fn test_region_migration(store_type: StorageType, endpoints: Vec) { + let cluster_name = "test_region_migration"; + let peer_factory = |id| Peer { + id, + addr: PEER_PLACEHOLDER_ADDR.to_string(), + }; + + // Prepares test cluster. + let (store_config, _guard) = get_test_store_config(&store_type); + let home_dir = create_temp_dir("test_migration_data_home"); + let datanodes = 5u64; + let builder = GreptimeDbClusterBuilder::new(cluster_name).await; + let const_selector = Arc::new(ConstNodeSelector::new(vec![ + peer_factory(1), + peer_factory(2), + peer_factory(3), + ])); + let cluster = builder + .with_datanodes(datanodes as u32) + .with_store_config(store_config) + .with_wal_config(DatanodeWalConfig::Kafka(DatanodeKafkaConfig { + broker_endpoints: endpoints.clone(), + linger: Duration::from_millis(25), + ..Default::default() + })) + .with_meta_wal_config(MetaSrvWalConfig::Kafka(MetaSrvKafkaConfig { + broker_endpoints: endpoints, + num_topics: 3, + topic_name_prefix: Uuid::new_v4().to_string(), + ..Default::default() + })) + .with_shared_home_dir(Arc::new(home_dir)) + .with_meta_selector(const_selector.clone()) + .build() + .await; + let mut logical_timer = 1685508715000; + + // Prepares test table. + let table_id = prepare_testing_table(&cluster).await; + + // Inserts data + let results = insert_values(&cluster.frontend, logical_timer).await; + logical_timer += 1000; + for result in results { + assert!(matches!(result.unwrap(), Output::AffectedRows(1))); + } + + // The region distribution + let mut distribution = find_region_distribution_by_sql(&cluster).await; + + let old_distribution = distribution.clone(); + + // Selecting target of region migration. + let region_migration_manager = cluster.meta_srv.region_migration_manager(); + let (from_peer_id, from_regions) = distribution.pop_first().unwrap(); + info!( + "Selecting from peer: {from_peer_id}, and regions: {:?}", + from_regions + ); + let (to_peer_id, to_regions) = distribution.pop_first().unwrap(); + info!( + "Selecting to peer: {to_peer_id}, and regions: {:?}", + to_regions + ); + + let region_id = RegionId::new(table_id, from_regions[0]); + // Trigger region migration. + let procedure_id = + trigger_migration_by_sql(&cluster, region_id.as_u64(), from_peer_id, to_peer_id).await; + + info!("Started region procedure: {}!", procedure_id); + + // Waits condition by checking procedure state + let frontend = cluster.frontend.clone(); + wait_condition( + Duration::from_secs(10), + Box::pin(async move { + loop { + let state = query_procedure_by_sql(&frontend, &procedure_id).await; + if state == "{\"status\":\"Done\"}" { + info!("Migration done: {state}"); + break; + } else { + info!("Migration not finished: {state}"); + tokio::time::sleep(Duration::from_millis(200)).await; + } + } + }), + ) + .await; + + // Inserts more table. + let results = insert_values(&cluster.frontend, logical_timer).await; + for result in results { + assert!(matches!(result.unwrap(), Output::AffectedRows(1))); + } + + // Asserts the writes. + assert_values(&cluster.frontend).await; + + // Triggers again. + let procedure = region_migration_manager + .submit_procedure(RegionMigrationProcedureTask::new( + 0, + region_id, + peer_factory(from_peer_id), + peer_factory(to_peer_id), + Duration::from_millis(1000), + )) + .await + .unwrap(); + assert!(procedure.is_none()); + + let new_distribution = find_region_distribution_by_sql(&cluster).await; + + assert_ne!(old_distribution, new_distribution); +} + /// A region migration test for a region server contains multiple regions of the table. pub async fn test_region_migration_multiple_regions( store_type: StorageType, @@ -724,12 +848,103 @@ async fn find_region_distribution( .unwrap() } +/// Find region distribution by SQL query +async fn find_region_distribution_by_sql(cluster: &GreptimeDbCluster) -> RegionDistribution { + let query_ctx = QueryContext::arc(); + + let Output::Stream(stream, _) = run_sql( + &cluster.frontend, + &format!(r#"select b.peer_id as datanode_id, + a.greptime_partition_id as region_id + from information_schema.partitions a left join information_schema.greptime_region_peers b + on a.greptime_partition_id = b.region_id + where a.table_name='{TEST_TABLE_NAME}' order by datanode_id asc"# + ), + query_ctx.clone(), + ) + .await.unwrap() else { + unreachable!(); + }; + + let recordbatches = RecordBatches::try_collect(stream).await.unwrap(); + + let mut distribution = RegionDistribution::new(); + + for batch in recordbatches.take() { + let datanode_ids: &UInt64Vector = + unsafe { Helper::static_cast(batch.column_by_name("datanode_id").unwrap()) }; + let region_ids: &UInt64Vector = + unsafe { Helper::static_cast(batch.column_by_name("region_id").unwrap()) }; + + for (datanode_id, region_id) in datanode_ids.iter_data().zip(region_ids.iter_data()) { + let (Some(datanode_id), Some(region_id)) = (datanode_id, region_id) else { + unreachable!(); + }; + + let region_id = RegionId::from_u64(region_id); + distribution + .entry(datanode_id) + .or_default() + .push(region_id.region_number()); + } + } + + distribution +} + +/// Trigger the region migration by SQL, returns the procedure id if success. +async fn trigger_migration_by_sql( + cluster: &GreptimeDbCluster, + region_id: u64, + from_peer_id: u64, + to_peer_id: u64, +) -> String { + let Output::Stream(stream, _) = run_sql( + &cluster.frontend, + &format!("select migrate_region({region_id}, {from_peer_id}, {to_peer_id})"), + QueryContext::arc(), + ) + .await + .unwrap() else { + unreachable!(); + }; + + let recordbatches = RecordBatches::try_collect(stream).await.unwrap(); + + let Value::String(procedure_id) = recordbatches.take()[0].column(0).get(0) else { + unreachable!(); + }; + + procedure_id.as_utf8().to_string() +} + +/// Query procedure state by SQL. +async fn query_procedure_by_sql(instance: &Arc, pid: &str) -> String { + let Output::Stream(stream, _) = run_sql( + instance, + &format!("select procedure_state('{pid}')"), + QueryContext::arc(), + ) + .await + .unwrap() else { + unreachable!(); + }; + + let recordbatches = RecordBatches::try_collect(stream).await.unwrap(); + + let Value::String(state) = recordbatches.take()[0].column(0).get(0) else { + unreachable!(); + }; + + state.as_utf8().to_string() +} + async fn insert_values(instance: &Arc, ts: u64) -> Vec> { let query_ctx = QueryContext::arc(); let mut results = Vec::new(); for range in [5, 15, 55] { - let result = insert_value( + let result = run_sql( instance, &format!("INSERT INTO {TEST_TABLE_NAME} VALUES ({},{})", range, ts), query_ctx.clone(), @@ -741,10 +956,11 @@ async fn insert_values(instance: &Arc, ts: u64) -> Vec, sql: &str, query_ctx: QueryContextRef, ) -> FrontendResult { + info!("Run SQL: {sql}"); instance.do_query(sql, query_ctx).await.remove(0) } diff --git a/tests/runner/Cargo.toml b/tests/runner/Cargo.toml index e6ad0bd84de2..10fdbfeda7a8 100644 --- a/tests/runner/Cargo.toml +++ b/tests/runner/Cargo.toml @@ -4,6 +4,9 @@ version.workspace = true edition.workspace = true license.workspace = true +[lints] +workspace = true + [dependencies] async-trait = "0.1" clap.workspace = true