From 4c596546a00b9e5e16279a4141b2aa964f79148f Mon Sep 17 00:00:00 2001 From: Kexiang Wang Date: Mon, 13 May 2024 23:08:46 +0800 Subject: [PATCH 01/11] =?UTF-8?q?feat(catalog):=20add=20has=5Ftable=5Fpriv?= =?UTF-8?q?ilege,=20has=5Fschema=5Fprivilege,=20has=5Fany=E2=80=A6=20(#166?= =?UTF-8?q?74)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- e2e_test/batch/catalog/has_privilege.slt.part | 228 ++++++++++++++++++ proto/expr.proto | 3 + src/frontend/src/binder/expr/function.rs | 45 ++++ src/frontend/src/catalog/database_catalog.rs | 11 + src/frontend/src/catalog/schema_catalog.rs | 18 ++ .../src/expr/function_impl/has_privilege.rs | 189 +++++++++++++++ src/frontend/src/expr/function_impl/mod.rs | 1 + src/frontend/src/expr/pure.rs | 3 + .../src/optimizer/plan_expr_visitor/strong.rs | 3 + src/frontend/src/user/user_catalog.rs | 32 ++- 10 files changed, 532 insertions(+), 1 deletion(-) create mode 100644 e2e_test/batch/catalog/has_privilege.slt.part create mode 100644 src/frontend/src/expr/function_impl/has_privilege.rs diff --git a/e2e_test/batch/catalog/has_privilege.slt.part b/e2e_test/batch/catalog/has_privilege.slt.part new file mode 100644 index 0000000000000..21c12bac45593 --- /dev/null +++ b/e2e_test/batch/catalog/has_privilege.slt.part @@ -0,0 +1,228 @@ +statement ok +CREATE USER test_user; + +statement ok +CREATE SCHEMA test_schema; + +statement ok +CREATE TABLE foo (id INT, name VARCHAR); + +statement ok +CREATE VIEW foo_view AS SELECT * FROM foo; + +statement ok +CREATE INDEX foo_index ON foo(id); + +statement ok +CREATE MATERIALIZED VIEW foo_mv AS SELECT * FROM foo; + +statement ok +CREATE SOURCE foo_source (a int, b int) with ( + connector = 'datagen', + datagen.rows.per.second = '1', + datagen.split.num = '1' +); + +statement ok +CREATE TABLE bar (id INT); + +statement ok +GRANT ALL PRIVILEGES ON foo TO test_user GRANTED BY root; + +statement ok +GRANT INSERT ON bar TO test_user WITH GRANT OPTION GRANTED BY root; + +statement ok +GRANT SELECT ON ALL TABLES IN SCHEMA public TO test_user WITH GRANT OPTION GRANTED BY root; + +statement ok +GRANT SELECT ON ALL MATERIALIZED VIEWS IN SCHEMA public TO test_user WITH GRANT OPTION GRANTED BY root; + +statement ok +GRANT SELECT ON ALL SOURCES IN SCHEMA public TO test_user WITH GRANT OPTION GRANTED BY root; + +statement ok +GRANT CREATE ON SCHEMA test_schema TO test_user; + +query error Invalid parameter user: User test_user_err not found +SELECT has_table_privilege('test_user_err', 'foo', 'SELECT'); + +query error Invalid parameter name: class not found: foo_err +SELECT has_table_privilege('test_user', 'foo_err', 'SELECT'); + +query error Invalid parameter privilege: unrecognized privilege type: "SELE CT" +SELECT has_table_privilege('test_user', 'foo', 'SELE CT'); + +query error Invalid parameter privilege: unrecognized privilege type: "SELECT INSERT" +SELECT has_table_privilege('test_user', 'foo', 'SELECT INSERT'); + +query error Invalid parameter privilege +SELECT has_table_privilege('test_user', 'foo', 'SELECT, INSERT WITH GRANT OPTION'); + +query error Invalid parameter user: User test_user_err not found +SELECT has_schema_privilege('test_user_err', 'test_schema', 'CREATE'); + +query error Invalid parameter schema: schema not found: test_schema_err +SELECT has_schema_privilege('test_user', 'test_schema_err', 'CREATE'); + +query error Invalid parameter privilege: unrecognized privilege type: "INSERT" +SELECT has_schema_privilege('test_user', 'test_schema', 'INSERT'); + +query error Invalid parameter privilege: unrecognized privilege type: "DELETE" +SELECT has_any_column_privilege('test_user', 'foo_mv'::regclass, 'DELETE'); + +query I +SELECT has_table_privilege('test_user', 'foo', 'SELECT'); +---- +t + +query I +SELECT has_table_privilege('test_user', 'foo', 'SELECT WITH GRANT OPTION'); +---- +t + +query I +SELECT has_table_privilege('test_user', 'foo', 'INSERT WITH GRANT OPTION'); +---- +f + +query I +SELECT has_table_privilege('test_user', 'foo', 'INSERT, SELECT WITH GRANT OPTION'); +---- +t + +query I +SELECT has_table_privilege('test_user', 'foo', 'DELETE, INSERT, SELECT WITH GRANT OPTION'); +---- +t + +query I +SELECT has_table_privilege('test_user', 'foo', 'DELETE WITH GRANT OPTION, INSERT, SELECT WITH GRANT OPTION'); +---- +f + +# FIXME(Kexiang): Currently, RW's grant privilege on all table doesn't apply to VIEWS. +query I +SELECT has_table_privilege('test_user', 'foo_view', 'SELECT'); +---- +f + +query I +SELECT has_table_privilege('test_user', 'foo_view'::regclass, 'INSERT'); +---- +f + +query I +SELECT has_any_column_privilege('test_user', 'foo_view'::regclass, 'INSERT'); +---- +f + +query I +SELECT has_table_privilege('test_user', 'foo_mv', 'SELECT'); +---- +t + +query I +SELECT has_table_privilege('test_user', 'foo_mv'::regclass, 'SELECT WITH GRANT OPTION'); +---- +t + +query I +SELECT has_any_column_privilege('test_user', 'foo_mv'::regclass, 'SELECT WITH GRANT OPTION'); +---- +t + +query I +SELECT has_table_privilege('test_user', 'foo_mv', 'INSERT'); +---- +f + +query I +SELECT has_table_privilege('test_user', 'foo_source'::regclass, 'SELECT'); +---- +t + +query I +SELECT has_table_privilege('test_user', 'foo_source', 'INSERT'); +---- +f + +# Indexes are granted by `GRANT SELECT ON ALL MATERIALIZED VIEWS` +query I +SELECT has_table_privilege('test_user', 'foo_index'::regclass, 'SELECT'); +---- +t + +query I +SELECT has_table_privilege('test_user', 'foo_index', 'INSERT'); +---- +f + +query I +SELECT has_table_privilege('test_user', 'bar', 'INSERT'); +---- +t + +query I +SELECT has_table_privilege('bar', 'INSERT'); +---- +t + +query I +SELECT has_table_privilege('bar'::regclass, 'SELECT'); +---- +t + +query I +SELECT has_table_privilege('bar'::regclass, 'SELECT'); +---- +t + +query I +SELECT has_table_privilege('test_user', 'bar', 'UPDATE'); +---- +f + +query I +SELECT has_table_privilege('test_user', 'bar'::regclass, 'INSERT WITH GRANT OPTION'); +---- +t + +query I +SELECT has_schema_privilege('public', 'USAGE'); +---- +t + +query I +SELECT has_schema_privilege('test_user', 'test_schema', 'USAGE'); +---- +f + +query I +SELECT has_schema_privilege('test_user', 'test_schema', 'CREATE'); +---- +t + +statement ok +DROP SOURCE foo_source; + +statement ok +DROP MATERIALIZED VIEW foo_mv; + +statement ok +DROP INDEX foo_index; + +statement ok +DROP VIEW foo_view; + +statement ok +DROP TABLE foo; + +statement ok +DROP TABLE bar; + +statement ok +DROP SCHEMA test_schema; + +statement ok +DROP USER test_user; diff --git a/proto/expr.proto b/proto/expr.proto index c8a9887c1663e..1ba1f052eb9c4 100644 --- a/proto/expr.proto +++ b/proto/expr.proto @@ -299,6 +299,9 @@ message ExprNode { PG_INDEXES_SIZE = 2404; PG_RELATION_SIZE = 2405; PG_GET_SERIAL_SEQUENCE = 2406; + HAS_TABLE_PRIVILEGE = 2407; + HAS_ANY_COLUMN_PRIVILEGE = 2408; + HAS_SCHEMA_PRIVILEGE = 2409; // EXTERNAL ICEBERG_TRANSFORM = 2201; diff --git a/src/frontend/src/binder/expr/function.rs b/src/frontend/src/binder/expr/function.rs index 2134e08fe8c66..393e27081572c 100644 --- a/src/frontend/src/binder/expr/function.rs +++ b/src/frontend/src/binder/expr/function.rs @@ -1303,6 +1303,51 @@ impl Binder { ("pg_get_partkeydef", raw_literal(ExprImpl::literal_null(DataType::Varchar))), ("pg_encoding_to_char", raw_literal(ExprImpl::literal_varchar("UTF8".into()))), ("has_database_privilege", raw_literal(ExprImpl::literal_bool(true))), + ("has_table_privilege", raw(|binder, mut inputs|{ + if inputs.len() == 2 { + inputs.insert(0, ExprImpl::literal_varchar(binder.auth_context.user_name.clone())); + } + if inputs.len() == 3 { + if inputs[1].return_type() == DataType::Varchar { + inputs[1].cast_to_regclass_mut()?; + } + Ok(FunctionCall::new(ExprType::HasTablePrivilege, inputs)?.into()) + } else { + Err(ErrorCode::ExprError( + "Too many/few arguments for pg_catalog.has_table_privilege()".into(), + ) + .into()) + } + })), + ("has_any_column_privilege", raw(|binder, mut inputs|{ + if inputs.len() == 2 { + inputs.insert(0, ExprImpl::literal_varchar(binder.auth_context.user_name.clone())); + } + if inputs.len() == 3 { + if inputs[1].return_type() == DataType::Varchar { + inputs[1].cast_to_regclass_mut()?; + } + Ok(FunctionCall::new(ExprType::HasAnyColumnPrivilege, inputs)?.into()) + } else { + Err(ErrorCode::ExprError( + "Too many/few arguments for pg_catalog.has_any_column_privilege()".into(), + ) + .into()) + } + })), + ("has_schema_privilege", raw(|binder, mut inputs|{ + if inputs.len() == 2 { + inputs.insert(0, ExprImpl::literal_varchar(binder.auth_context.user_name.clone())); + } + if inputs.len() == 3 { + Ok(FunctionCall::new(ExprType::HasSchemaPrivilege, inputs)?.into()) + } else { + Err(ErrorCode::ExprError( + "Too many/few arguments for pg_catalog.has_schema_privilege()".into(), + ) + .into()) + } + })), ("pg_stat_get_numscans", raw_literal(ExprImpl::literal_bigint(0))), ("pg_backend_pid", raw(|binder, _inputs| { // FIXME: the session id is not global unique in multi-frontend env. diff --git a/src/frontend/src/catalog/database_catalog.rs b/src/frontend/src/catalog/database_catalog.rs index ec040e309eba8..c562e5fc77a86 100644 --- a/src/frontend/src/catalog/database_catalog.rs +++ b/src/frontend/src/catalog/database_catalog.rs @@ -17,6 +17,7 @@ use std::collections::HashMap; use itertools::Itertools; use risingwave_common::catalog::PG_CATALOG_SCHEMA_NAME; use risingwave_pb::catalog::{PbDatabase, PbSchema}; +use risingwave_pb::user::grant_privilege::Object; use super::OwnedByUserCatalog; use crate::catalog::schema_catalog::SchemaCatalog; @@ -99,6 +100,16 @@ impl DatabaseCatalog { .find(|schema| schema.get_table_by_id(table_id).is_some()) } + pub fn get_grant_object_by_oid(&self, oid: u32) -> Option { + for schema in self.schema_by_name.values() { + let object = schema.get_grant_object_by_oid(oid); + if object.is_some() { + return object; + } + } + None + } + pub fn update_schema(&mut self, prost: &PbSchema) { let id = prost.id; let name = prost.name.clone(); diff --git a/src/frontend/src/catalog/schema_catalog.rs b/src/frontend/src/catalog/schema_catalog.rs index 56de1d743da41..20a99ad820af8 100644 --- a/src/frontend/src/catalog/schema_catalog.rs +++ b/src/frontend/src/catalog/schema_catalog.rs @@ -24,6 +24,7 @@ pub use risingwave_expr::sig::*; use risingwave_pb::catalog::{ PbConnection, PbFunction, PbIndex, PbSchema, PbSink, PbSource, PbSubscription, PbTable, PbView, }; +use risingwave_pb::user::grant_privilege::Object; use super::subscription_catalog::SubscriptionCatalog; use super::{OwnedByUserCatalog, SubscriptionId}; @@ -703,6 +704,23 @@ impl SchemaCatalog { .map(|s| s.to_owned()) } + pub fn get_grant_object_by_oid(&self, oid: u32) -> Option { + #[allow(clippy::manual_map)] + if self.get_table_by_id(&TableId::new(oid)).is_some() + || self.get_index_by_id(&IndexId::new(oid)).is_some() + { + Some(Object::TableId(oid)) + } else if self.get_source_by_id(&oid).is_some() { + Some(Object::SourceId(oid)) + } else if self.get_sink_by_id(&oid).is_some() { + Some(Object::SinkId(oid)) + } else if self.get_view_by_id(&oid).is_some() { + Some(Object::ViewId(oid)) + } else { + None + } + } + pub fn id(&self) -> SchemaId { self.id } diff --git a/src/frontend/src/expr/function_impl/has_privilege.rs b/src/frontend/src/expr/function_impl/has_privilege.rs new file mode 100644 index 0000000000000..735a9dd1b6d92 --- /dev/null +++ b/src/frontend/src/expr/function_impl/has_privilege.rs @@ -0,0 +1,189 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; + +use risingwave_expr::{capture_context, function, ExprError, Result}; +use risingwave_pb::user::grant_privilege::{Action, Object}; +use thiserror_ext::AsReport; + +use super::context::{CATALOG_READER, DB_NAME, USER_INFO_READER}; +use crate::catalog::CatalogReader; +use crate::user::user_service::UserInfoReader; + +#[inline(always)] +pub fn user_not_found_err(inner_err: &str) -> ExprError { + ExprError::InvalidParam { + name: "user", + reason: inner_err.into(), + } +} + +#[function("has_table_privilege(int4, int4, varchar) -> boolean")] +fn has_table_privilege(user_id: i32, table_oid: i32, privileges: &str) -> Result { + // does user have privilege for table + let user_name = get_user_name_by_id_captured(user_id)?; + has_table_privilege_1(user_name.as_str(), table_oid, privileges) +} + +#[function("has_table_privilege(varchar, int4, varchar) -> boolean")] +fn has_table_privilege_1(user_name: &str, table_oid: i32, privileges: &str) -> Result { + let allowed_actions = HashSet::new(); + let actions = parse_privilege(privileges, &allowed_actions)?; + // currently, we haven't support grant for view. + has_privilege_impl_captured( + user_name, + &get_grant_object_by_oid_captured(table_oid)?, + &actions, + ) +} + +#[function("has_any_column_privilege(int4, int4, varchar) -> boolean")] +fn has_any_column_privilege(user_id: i32, table_oid: i32, privileges: &str) -> Result { + // does user have privilege for any column of table + let user_name = get_user_name_by_id_captured(user_id)?; + has_any_column_privilege_1(user_name.as_str(), table_oid, privileges) +} + +#[function("has_any_column_privilege(varchar, int4, varchar) -> boolean")] +fn has_any_column_privilege_1(user_name: &str, table_oid: i32, privileges: &str) -> Result { + let allowed_actions = HashSet::from_iter([Action::Select, Action::Insert, Action::Update]); + let actions = parse_privilege(privileges, &allowed_actions)?; + has_privilege_impl_captured( + user_name, + &get_grant_object_by_oid_captured(table_oid)?, + &actions, + ) +} + +#[function("has_schema_privilege(varchar, int4, varchar) -> boolean")] +fn has_schema_privilege(user_name: &str, schema_oid: i32, privileges: &str) -> Result { + // does user have privilege for schema + let allowed_actions = HashSet::from_iter([Action::Create, Action::Usage]); + let actions = parse_privilege(privileges, &allowed_actions)?; + has_privilege_impl_captured(user_name, &Object::SchemaId(schema_oid as u32), &actions) +} + +#[function("has_schema_privilege(int4, varchar, varchar) -> boolean")] +fn has_schema_privilege_1(user_id: i32, schema_name: &str, privileges: &str) -> Result { + let user_name = get_user_name_by_id_captured(user_id)?; + let schema_oid = get_schema_id_by_name_captured(schema_name)?; + has_schema_privilege(user_name.as_str(), schema_oid, privileges) +} + +#[function("has_schema_privilege(int4, int4, varchar) -> boolean")] +fn has_schema_privilege_2(user_id: i32, schema_oid: i32, privileges: &str) -> Result { + let user_name = get_user_name_by_id_captured(user_id)?; + has_schema_privilege(user_name.as_str(), schema_oid, privileges) +} + +#[function("has_schema_privilege(varchar, varchar, varchar) -> boolean")] +fn has_schema_privilege_3(user_name: &str, schema_name: &str, privileges: &str) -> Result { + let schema_oid = get_schema_id_by_name_captured(schema_name)?; + has_schema_privilege(user_name, schema_oid, privileges) +} + +#[capture_context(USER_INFO_READER)] +fn has_privilege_impl( + user_info_reader: &UserInfoReader, + user_name: &str, + object: &Object, + actions: &Vec<(Action, bool)>, +) -> Result { + let user_info = &user_info_reader.read_guard(); + let user_catalog = user_info + .get_user_by_name(user_name) + .ok_or(user_not_found_err( + format!("User {} not found", user_name).as_str(), + ))?; + Ok(user_catalog.check_privilege_with_grant_option(object, actions)) +} + +#[capture_context(USER_INFO_READER)] +fn get_user_name_by_id(user_info_reader: &UserInfoReader, user_id: i32) -> Result { + let user_info = &user_info_reader.read_guard(); + user_info + .get_user_name_by_id(user_id as u32) + .ok_or(user_not_found_err( + format!("User {} not found", user_id).as_str(), + )) +} + +#[capture_context(CATALOG_READER, DB_NAME)] +fn get_grant_object_by_oid( + catalog_reader: &CatalogReader, + db_name: &str, + oid: i32, +) -> Result { + catalog_reader + .read_guard() + .get_database_by_name(db_name) + .map_err(|e| ExprError::InvalidParam { + name: "oid", + reason: e.to_report_string().into(), + })? + .get_grant_object_by_oid(oid as u32) + .ok_or(ExprError::InvalidParam { + name: "oid", + reason: format!("Table {} not found", oid).as_str().into(), + }) +} + +#[capture_context(CATALOG_READER, DB_NAME)] +fn get_schema_id_by_name( + catalog_reader: &CatalogReader, + db_name: &str, + schema_name: &str, +) -> Result { + let reader = &catalog_reader.read_guard(); + Ok(reader + .get_schema_by_name(db_name, schema_name) + .map_err(|e| ExprError::InvalidParam { + name: "schema", + reason: e.to_report_string().into(), + })? + .id() as i32) +} + +fn parse_privilege( + privilege_string: &str, + allowed_actions: &HashSet, +) -> Result> { + let mut privileges = Vec::new(); + for part in privilege_string.split(',').map(str::trim) { + let (privilege_type, grant_option) = match part.rsplit_once(" WITH GRANT OPTION") { + Some((p, _)) => (p, true), + None => (part, false), + }; + match Action::from_str_name(privilege_type.to_uppercase().as_str()) { + Some(Action::Unspecified) | None => { + return Err(ExprError::InvalidParam { + name: "privilege", + reason: format!("unrecognized privilege type: \"{}\"", part).into(), + }) + } + Some(action) => { + if allowed_actions.is_empty() || allowed_actions.contains(&action) { + privileges.push((action, grant_option)) + } else { + return Err(ExprError::InvalidParam { + name: "privilege", + reason: format!("unrecognized privilege type: \"{}\"", part).into(), + }); + } + } + } + } + Ok(privileges) +} diff --git a/src/frontend/src/expr/function_impl/mod.rs b/src/frontend/src/expr/function_impl/mod.rs index ad0b3b7a853d8..3a70ebf5db474 100644 --- a/src/frontend/src/expr/function_impl/mod.rs +++ b/src/frontend/src/expr/function_impl/mod.rs @@ -15,6 +15,7 @@ mod cast_regclass; mod col_description; pub mod context; +mod has_privilege; mod pg_get_indexdef; mod pg_get_userbyid; mod pg_get_viewdef; diff --git a/src/frontend/src/expr/pure.rs b/src/frontend/src/expr/pure.rs index bc18959e5be55..d80ef31b04fde 100644 --- a/src/frontend/src/expr/pure.rs +++ b/src/frontend/src/expr/pure.rs @@ -267,6 +267,9 @@ impl ExprVisitor for ImpureAnalyzer { | Type::PgIndexesSize | Type::PgRelationSize | Type::PgGetSerialSequence + | Type::HasTablePrivilege + | Type::HasAnyColumnPrivilege + | Type::HasSchemaPrivilege | Type::MakeTimestamptz => self.impure = true, } } diff --git a/src/frontend/src/optimizer/plan_expr_visitor/strong.rs b/src/frontend/src/optimizer/plan_expr_visitor/strong.rs index e30cdb0b6e314..7045bb226efd7 100644 --- a/src/frontend/src/optimizer/plan_expr_visitor/strong.rs +++ b/src/frontend/src/optimizer/plan_expr_visitor/strong.rs @@ -301,6 +301,9 @@ impl Strong { | ExprType::PgRelationSize | ExprType::PgGetSerialSequence | ExprType::IcebergTransform + | ExprType::HasTablePrivilege + | ExprType::HasAnyColumnPrivilege + | ExprType::HasSchemaPrivilege | ExprType::InetAton | ExprType::InetNtoa => false, ExprType::Unspecified => unreachable!(), diff --git a/src/frontend/src/user/user_catalog.rs b/src/frontend/src/user/user_catalog.rs index a33e9423fc6ea..319d60fc56ae2 100644 --- a/src/frontend/src/user/user_catalog.rs +++ b/src/frontend/src/user/user_catalog.rs @@ -16,7 +16,7 @@ use std::collections::hash_map::Entry; use std::collections::HashMap; use risingwave_common::acl::{AclMode, AclModeSet}; -use risingwave_pb::user::grant_privilege::{Object as GrantObject, Object}; +use risingwave_pb::user::grant_privilege::{Action, Object as GrantObject, Object}; use risingwave_pb::user::{PbAuthInfo, PbGrantPrivilege, PbUserInfo}; use crate::catalog::{DatabaseId, SchemaId}; @@ -168,4 +168,34 @@ impl UserCatalog { self.get_acl(object) .map_or(false, |acl_set| acl_set.has_mode(mode)) } + + pub fn check_privilege_with_grant_option( + &self, + object: &GrantObject, + actions: &Vec<(Action, bool)>, + ) -> bool { + if self.is_super { + return true; + } + let mut action_map: HashMap<_, _> = actions.iter().map(|action| (action, false)).collect(); + + for privilege in &self.grant_privileges { + if privilege.get_object().unwrap() != object { + continue; + } + for awo in &privilege.action_with_opts { + let action = awo.get_action().unwrap(); + let with_grant_option = awo.with_grant_option; + + for (&key, found) in &mut action_map { + let (required_action, required_grant_option) = *key; + + if action == required_action && (!required_grant_option | with_grant_option) { + *found = true; + } + } + } + } + action_map.values().all(|&found| found) + } } From 39075f7b8ee2cd764201fbd04ef78c7a49f08c0e Mon Sep 17 00:00:00 2001 From: Kexiang Wang Date: Tue, 14 May 2024 01:01:48 +0800 Subject: [PATCH 02/11] fix(catalog): grant view when grant table (#16699) --- e2e_test/batch/catalog/has_privilege.slt.part | 42 +++++++++++++++++-- proto/user.proto | 2 +- src/common/src/acl/mod.rs | 2 +- src/frontend/src/handler/handle_privilege.rs | 35 +++++++++++----- src/frontend/src/user/user_catalog.rs | 1 - src/frontend/src/user/user_privilege.rs | 2 +- src/meta/service/src/user_service.rs | 23 +++++++++- src/meta/src/controller/catalog.rs | 15 ++++++- src/meta/src/controller/utils.rs | 3 +- src/meta/src/manager/catalog/database.rs | 8 ++++ src/meta/src/manager/catalog/mod.rs | 4 ++ 11 files changed, 116 insertions(+), 21 deletions(-) diff --git a/e2e_test/batch/catalog/has_privilege.slt.part b/e2e_test/batch/catalog/has_privilege.slt.part index 21c12bac45593..a742db0c51d6f 100644 --- a/e2e_test/batch/catalog/has_privilege.slt.part +++ b/e2e_test/batch/catalog/has_privilege.slt.part @@ -32,6 +32,9 @@ GRANT ALL PRIVILEGES ON foo TO test_user GRANTED BY root; statement ok GRANT INSERT ON bar TO test_user WITH GRANT OPTION GRANTED BY root; +statement ok +GRANT INSERT ON foo_view TO test_user WITH GRANT OPTION GRANTED BY root; + statement ok GRANT SELECT ON ALL TABLES IN SCHEMA public TO test_user WITH GRANT OPTION GRANTED BY root; @@ -44,6 +47,9 @@ GRANT SELECT ON ALL SOURCES IN SCHEMA public TO test_user WITH GRANT OPTION GRAN statement ok GRANT CREATE ON SCHEMA test_schema TO test_user; +query error table not found: bar_err +GRANT INSERT ON bar_err TO test_user WITH GRANT OPTION GRANTED BY root; + query error Invalid parameter user: User test_user_err not found SELECT has_table_privilege('test_user_err', 'foo', 'SELECT'); @@ -101,21 +107,25 @@ SELECT has_table_privilege('test_user', 'foo', 'DELETE WITH GRANT OPTION, INSERT ---- f -# FIXME(Kexiang): Currently, RW's grant privilege on all table doesn't apply to VIEWS. query I SELECT has_table_privilege('test_user', 'foo_view', 'SELECT'); ---- -f +t query I SELECT has_table_privilege('test_user', 'foo_view'::regclass, 'INSERT'); ---- +t + +query I +SELECT has_table_privilege('test_user', 'foo_view'::regclass, 'UPDATE'); +---- f query I SELECT has_any_column_privilege('test_user', 'foo_view'::regclass, 'INSERT'); ---- -f +t query I SELECT has_table_privilege('test_user', 'foo_mv', 'SELECT'); @@ -203,6 +213,32 @@ SELECT has_schema_privilege('test_user', 'test_schema', 'CREATE'); ---- t +statement ok +REVOKE SELECT ON ALL TABLES IN SCHEMA public FROM test_user GRANTED BY root; + +query I +SELECT has_table_privilege('test_user', 'bar'::regclass, 'SELECT'); +---- +f + +query I +SELECT has_table_privilege('test_user', 'foo_view', 'SELECT'); +---- +f + +query I +SELECT has_table_privilege('test_user', 'foo_view', 'INSERT'); +---- +t + +statement ok +REVOKE INSERT ON foo_view FROM test_user GRANTED BY root; + +query I +SELECT has_table_privilege('test_user', 'foo_view', 'INSERT'); +---- +f + statement ok DROP SOURCE foo_source; diff --git a/proto/user.proto b/proto/user.proto index 383e78cb57b28..6218fea4fd98d 100644 --- a/proto/user.proto +++ b/proto/user.proto @@ -66,7 +66,7 @@ message GrantPrivilege { uint32 all_tables_schema_id = 11; uint32 all_sources_schema_id = 12; - uint32 all_dml_tables_schema_id = 13; + uint32 all_dml_relations_schema_id = 13; uint32 subscription_id = 14; } repeated ActionWithGrantOption action_with_opts = 7; diff --git a/src/common/src/acl/mod.rs b/src/common/src/acl/mod.rs index 91f69fea51310..391ad0e5781f4 100644 --- a/src/common/src/acl/mod.rs +++ b/src/common/src/acl/mod.rs @@ -100,11 +100,11 @@ pub static ALL_AVAILABLE_DATABASE_MODES: LazyLock = LazyLock::new(|| make_bitflags!(AclMode::{Create | Connect}).into()); pub static ALL_AVAILABLE_SCHEMA_MODES: LazyLock = LazyLock::new(|| make_bitflags!(AclMode::{Create | Usage}).into()); +// Including TABLES and VIEWS pub static ALL_AVAILABLE_TABLE_MODES: LazyLock = LazyLock::new(|| make_bitflags!(AclMode::{Select | Insert | Update | Delete}).into()); pub static ALL_AVAILABLE_SOURCE_MODES: LazyLock = LazyLock::new(AclModeSet::readonly); pub static ALL_AVAILABLE_MVIEW_MODES: LazyLock = LazyLock::new(AclModeSet::readonly); -pub static ALL_AVAILABLE_VIEW_MODES: LazyLock = LazyLock::new(AclModeSet::readonly); pub static ALL_AVAILABLE_SINK_MODES: LazyLock = LazyLock::new(AclModeSet::empty); pub static ALL_AVAILABLE_SUBSCRIPTION_MODES: LazyLock = LazyLock::new(AclModeSet::empty); diff --git a/src/frontend/src/handler/handle_privilege.rs b/src/frontend/src/handler/handle_privilege.rs index 44e5b8fdac33c..e8a37b0ab407b 100644 --- a/src/frontend/src/handler/handle_privilege.rs +++ b/src/frontend/src/handler/handle_privilege.rs @@ -21,6 +21,7 @@ use super::RwPgResponse; use crate::binder::Binder; use crate::catalog::root_catalog::SchemaPath; use crate::catalog::table_catalog::TableType; +use crate::catalog::CatalogError; use crate::error::{ErrorCode, Result}; use crate::handler::HandlerArgs; use crate::session::SessionImpl; @@ -93,17 +94,31 @@ fn make_prost_privilege( Binder::resolve_schema_qualified_name(db_name, name)?; let schema_path = SchemaPath::new(schema_name.as_deref(), &search_path, user_name); - let (table, _) = reader.get_table_by_name(db_name, schema_path, &table_name)?; - match table.table_type() { - TableType::Table => {} - _ => { - return Err(ErrorCode::InvalidInputSyntax(format!( - "{table_name} is not a table", - )) - .into()); + match reader.get_table_by_name(db_name, schema_path, &table_name) { + Ok((table, _)) => { + match table.table_type() { + TableType::Table => { + grant_objs.push(PbObject::TableId(table.id().table_id)); + continue; + } + _ => { + return Err(ErrorCode::InvalidInputSyntax(format!( + "{table_name} is not a table", + )) + .into()); + } + }; + } + Err(CatalogError::NotFound("table", _)) => { + let (view, _) = reader + .get_view_by_name(db_name, schema_path, &table_name) + .map_err(|_| CatalogError::NotFound("table", table_name))?; + grant_objs.push(PbObject::ViewId(view.id)); + } + Err(e) => { + return Err(e.into()); } } - grant_objs.push(PbObject::TableId(table.id().table_id)); } } GrantObjects::Sources(sources) => { @@ -138,7 +153,7 @@ fn make_prost_privilege( for schema in schemas { let schema_name = Binder::resolve_schema_name(schema)?; let schema = reader.get_schema_by_name(session.database(), &schema_name)?; - grant_objs.push(PbObject::AllDmlTablesSchemaId(schema.id())); + grant_objs.push(PbObject::AllDmlRelationsSchemaId(schema.id())); } } o => { diff --git a/src/frontend/src/user/user_catalog.rs b/src/frontend/src/user/user_catalog.rs index 319d60fc56ae2..905348d37dfa6 100644 --- a/src/frontend/src/user/user_catalog.rs +++ b/src/frontend/src/user/user_catalog.rs @@ -189,7 +189,6 @@ impl UserCatalog { for (&key, found) in &mut action_map { let (required_action, required_grant_option) = *key; - if action == required_action && (!required_grant_option | with_grant_option) { *found = true; } diff --git a/src/frontend/src/user/user_privilege.rs b/src/frontend/src/user/user_privilege.rs index 2b474bda0554e..d4ad71e64120d 100644 --- a/src/frontend/src/user/user_privilege.rs +++ b/src/frontend/src/user/user_privilege.rs @@ -94,7 +94,7 @@ pub fn available_prost_privilege(object: PbObject, for_dml_table: bool) -> PbGra &acl::ALL_AVAILABLE_MVIEW_MODES } } - PbObject::ViewId(_) => &acl::ALL_AVAILABLE_VIEW_MODES, + PbObject::ViewId(_) => &acl::ALL_AVAILABLE_TABLE_MODES, PbObject::SinkId(_) => &acl::ALL_AVAILABLE_SINK_MODES, PbObject::SubscriptionId(_) => &acl::ALL_AVAILABLE_SUBSCRIPTION_MODES, PbObject::FunctionId(_) => &acl::ALL_AVAILABLE_FUNCTION_MODES, diff --git a/src/meta/service/src/user_service.rs b/src/meta/service/src/user_service.rs index d7dd4f5b4de4b..cab1b29228055 100644 --- a/src/meta/service/src/user_service.rs +++ b/src/meta/service/src/user_service.rs @@ -76,7 +76,7 @@ impl UserServiceImpl { } expanded_privileges.push(privilege); } - } else if let Some(Object::AllDmlTablesSchemaId(schema_id)) = &privilege.object { + } else if let Some(Object::AllDmlRelationsSchemaId(schema_id)) = &privilege.object { let tables = match &self.metadata_manager { MetadataManager::V1(mgr) => { mgr.catalog_manager.list_dml_table_ids(*schema_id).await @@ -89,6 +89,16 @@ impl UserServiceImpl { .map(|id| id as _) .collect(), }; + let views = match &self.metadata_manager { + MetadataManager::V1(mgr) => mgr.catalog_manager.list_view_ids(*schema_id).await, + MetadataManager::V2(mgr) => mgr + .catalog_controller + .list_view_ids(*schema_id as _) + .await? + .into_iter() + .map(|id| id as _) + .collect(), + }; for table_id in tables { let mut privilege = privilege.clone(); privilege.object = Some(Object::TableId(table_id)); @@ -100,6 +110,17 @@ impl UserServiceImpl { } expanded_privileges.push(privilege); } + for view_id in views { + let mut privilege = privilege.clone(); + privilege.object = Some(Object::ViewId(view_id)); + if let Some(true) = with_grant_option { + privilege + .action_with_opts + .iter_mut() + .for_each(|p| p.with_grant_option = true); + } + expanded_privileges.push(privilege); + } } else if let Some(Object::AllSourcesSchemaId(schema_id)) = &privilege.object { let sources = match &self.metadata_manager { MetadataManager::V1(mgr) => { diff --git a/src/meta/src/controller/catalog.rs b/src/meta/src/controller/catalog.rs index af1853aec366d..b8d88fabad220 100644 --- a/src/meta/src/controller/catalog.rs +++ b/src/meta/src/controller/catalog.rs @@ -31,7 +31,7 @@ use risingwave_meta_model_v2::{ ActorUpstreamActors, ColumnCatalogArray, ConnectionId, CreateType, DatabaseId, FragmentId, FunctionId, I32Array, IndexId, JobStatus, ObjectId, PrivateLinkService, Property, SchemaId, SinkId, SourceId, StreamNode, StreamSourceInfo, StreamingParallelism, SubscriptionId, TableId, - UserId, + UserId, ViewId, }; use risingwave_pb::catalog::subscription::SubscriptionState; use risingwave_pb::catalog::table::PbTableType; @@ -2428,6 +2428,19 @@ impl CatalogController { Ok(table_ids) } + pub async fn list_view_ids(&self, schema_id: SchemaId) -> MetaResult> { + let inner = self.inner.read().await; + let view_ids: Vec = View::find() + .select_only() + .column(view::Column::ViewId) + .join(JoinType::InnerJoin, view::Relation::Object.def()) + .filter(object::Column::SchemaId.eq(schema_id)) + .into_tuple() + .all(&inner.db) + .await?; + Ok(view_ids) + } + pub async fn list_tables_by_type(&self, table_type: TableType) -> MetaResult> { let inner = self.inner.read().await; let table_objs = Table::find() diff --git a/src/meta/src/controller/utils.rs b/src/meta/src/controller/utils.rs index 42ccc97e6637f..7d8f4769e8264 100644 --- a/src/meta/src/controller/utils.rs +++ b/src/meta/src/controller/utils.rs @@ -754,12 +754,11 @@ where let obj = match object.obj_type { ObjectType::Database => PbObject::DatabaseId(oid), ObjectType::Schema => PbObject::SchemaId(oid), - ObjectType::Table => PbObject::TableId(oid), + ObjectType::Table | ObjectType::Index => PbObject::TableId(oid), ObjectType::Source => PbObject::SourceId(oid), ObjectType::Sink => PbObject::SinkId(oid), ObjectType::View => PbObject::ViewId(oid), ObjectType::Function => PbObject::FunctionId(oid), - ObjectType::Index => unreachable!("index is not supported yet"), ObjectType::Connection => unreachable!("connection is not supported yet"), ObjectType::Subscription => PbObject::SubscriptionId(oid), }; diff --git a/src/meta/src/manager/catalog/database.rs b/src/meta/src/manager/catalog/database.rs index 1cf466caddde5..5b1b24e82de21 100644 --- a/src/meta/src/manager/catalog/database.rs +++ b/src/meta/src/manager/catalog/database.rs @@ -363,6 +363,14 @@ impl DatabaseManager { .collect_vec() } + pub fn list_view_ids(&self, schema_id: SchemaId) -> Vec { + self.views + .values() + .filter(|view| view.schema_id == schema_id) + .map(|view| view.id) + .collect_vec() + } + pub fn list_sources(&self) -> Vec { self.sources.values().cloned().collect_vec() } diff --git a/src/meta/src/manager/catalog/mod.rs b/src/meta/src/manager/catalog/mod.rs index 4eb2970fea858..ab452dfe05b21 100644 --- a/src/meta/src/manager/catalog/mod.rs +++ b/src/meta/src/manager/catalog/mod.rs @@ -3608,6 +3608,10 @@ impl CatalogManager { .list_dml_table_ids(schema_id) } + pub async fn list_view_ids(&self, schema_id: SchemaId) -> Vec { + self.core.lock().await.database.list_view_ids(schema_id) + } + pub async fn list_sources(&self) -> Vec { self.core.lock().await.database.list_sources() } From 5d28913c4b09f7eae892cf96e244c8a5694a1147 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 10:09:56 +0800 Subject: [PATCH 03/11] chore(deps): Bump Npgsql from 8.0.2 to 8.0.3 in /integration_tests/client-library/csharp (#16668) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- integration_tests/client-library/csharp/csharp.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/client-library/csharp/csharp.csproj b/integration_tests/client-library/csharp/csharp.csproj index 2d7ae2b7e2f48..f57b4028aabe9 100644 --- a/integration_tests/client-library/csharp/csharp.csproj +++ b/integration_tests/client-library/csharp/csharp.csproj @@ -12,7 +12,7 @@ - + From 571ee28109ea3a5d729b4f830c60d498894bf4ba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 10:29:22 +0800 Subject: [PATCH 04/11] chore(deps): Bump rust-embed from 8.3.0 to 8.4.0 (#16735) Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 911f5484558ee..09ed32284fc83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1848,7 +1848,7 @@ dependencies = [ "bitflags 2.5.0", "cexpr", "clang-sys", - "itertools 0.10.5", + "itertools 0.12.1", "lazy_static", "lazycell", "log", @@ -9322,7 +9322,7 @@ dependencies = [ "indoc", "libc", "memoffset", - "parking_lot 0.11.2", + "parking_lot 0.12.1", "portable-atomic", "pyo3-build-config", "pyo3-ffi", @@ -11731,9 +11731,9 @@ dependencies = [ [[package]] name = "rust-embed" -version = "8.3.0" +version = "8.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb78f46d0066053d16d4ca7b898e9343bc3530f71c61d5ad84cd404ada068745" +checksum = "19549741604902eb99a7ed0ee177a0663ee1eda51a29f71401f166e47e77806a" dependencies = [ "rust-embed-impl", "rust-embed-utils", @@ -11742,9 +11742,9 @@ dependencies = [ [[package]] name = "rust-embed-impl" -version = "8.3.0" +version = "8.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91ac2a3c6c0520a3fb3dd89321177c3c692937c4eb21893378219da10c44fc8" +checksum = "cb9f96e283ec64401f30d3df8ee2aaeb2561f34c824381efa24a35f79bf40ee4" dependencies = [ "proc-macro2", "quote", @@ -11756,9 +11756,9 @@ dependencies = [ [[package]] name = "rust-embed-utils" -version = "8.3.0" +version = "8.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86f69089032567ffff4eada41c573fc43ff466c7db7c5688b2e7969584345581" +checksum = "38c74a686185620830701348de757fd36bef4aa9680fd23c49fc539ddcc1af32" dependencies = [ "mime_guess", "sha2", From b7006a51f572f9efeacd0e2c7c29d210944d2ce8 Mon Sep 17 00:00:00 2001 From: stonepage <40830455+st1page@users.noreply.github.com> Date: Tue, 14 May 2024 12:52:11 +0800 Subject: [PATCH 05/11] refactor: bind create source (#16665) --- src/frontend/src/handler/create_source.rs | 216 +++++++++++------- src/frontend/src/handler/create_table.rs | 235 ++++++++------------ src/frontend/src/handler/create_table_as.rs | 6 +- 3 files changed, 234 insertions(+), 223 deletions(-) diff --git a/src/frontend/src/handler/create_source.rs b/src/frontend/src/handler/create_source.rs index 84d22631a454e..4ead8066a2736 100644 --- a/src/frontend/src/handler/create_source.rs +++ b/src/frontend/src/handler/create_source.rs @@ -54,15 +54,13 @@ use risingwave_connector::source::{ POSIX_FS_CONNECTOR, PULSAR_CONNECTOR, S3_CONNECTOR, }; use risingwave_connector::WithPropertiesExt; -use risingwave_pb::catalog::{ - PbSchemaRegistryNameStrategy, PbSource, StreamSourceInfo, WatermarkDesc, -}; +use risingwave_pb::catalog::{PbSchemaRegistryNameStrategy, StreamSourceInfo, WatermarkDesc}; use risingwave_pb::plan_common::additional_column::ColumnType as AdditionalColumnType; use risingwave_pb::plan_common::{EncodeType, FormatType}; use risingwave_pb::stream_plan::stream_fragment_graph::Parallelism; use risingwave_sqlparser::ast::{ get_delimiter, AstString, ColumnDef, ConnectorSchema, CreateSourceStatement, Encode, Format, - ProtobufSchema, SourceWatermark, + ObjectName, ProtobufSchema, SourceWatermark, TableConstraint, }; use risingwave_sqlparser::parser::IncludeOption; use thiserror_ext::AsReport; @@ -70,6 +68,7 @@ use thiserror_ext::AsReport; use super::RwPgResponse; use crate::binder::Binder; use crate::catalog::source_catalog::SourceCatalog; +use crate::catalog::{DatabaseId, SchemaId}; use crate::error::ErrorCode::{self, Deprecated, InvalidInputSyntax, NotSupported, ProtocolError}; use crate::error::{Result, RwError}; use crate::expr::Expr; @@ -1320,63 +1319,56 @@ pub fn bind_connector_props( } Ok(with_properties) } - -pub async fn handle_create_source( +#[allow(clippy::too_many_arguments)] +pub async fn bind_create_source( handler_args: HandlerArgs, - stmt: CreateSourceStatement, -) -> Result { - let session = handler_args.session.clone(); - - if let Either::Right(resp) = session.check_relation_name_duplicated( - stmt.source_name.clone(), - StatementType::CREATE_SOURCE, - stmt.if_not_exists, - )? { - return Ok(resp); - } - - let db_name = session.database(); - let (schema_name, name) = Binder::resolve_schema_qualified_name(db_name, stmt.source_name)?; + full_name: ObjectName, + source_schema: ConnectorSchema, + with_properties: HashMap, + sql_columns_defs: &[ColumnDef], + constraints: Vec, + wildcard_idx: Option, + source_watermarks: Vec, + columns_from_resolve_source: Option>, + source_info: StreamSourceInfo, + include_column_options: IncludeOption, + col_id_gen: &mut ColumnIdGenerator, + // `true` for "create source", `false` for "create table with connector" + is_create_source: bool, +) -> Result<(SourceCatalog, DatabaseId, SchemaId)> { + let session = &handler_args.session; + let db_name: &str = session.database(); + let (schema_name, source_name) = Binder::resolve_schema_qualified_name(db_name, full_name)?; let (database_id, schema_id) = session.get_database_and_schema_id_for_create(schema_name.clone())?; - if handler_args.with_options.is_empty() { - return Err(RwError::from(InvalidInputSyntax( - "missing WITH clause".to_string(), - ))); + if !is_create_source && with_properties.is_iceberg_connector() { + return Err( + ErrorCode::BindError("can't create table with iceberg connector".to_string()).into(), + ); } - let source_schema = stmt.source_schema.into_v2_with_warning(); - - let with_properties = bind_connector_props(&handler_args, &source_schema, true)?; - ensure_table_constraints_supported(&stmt.constraints)?; - let sql_pk_names = bind_sql_pk_names(&stmt.columns, &stmt.constraints)?; - - let create_cdc_source_job = with_properties.is_shareable_cdc_connector(); - let is_shared = create_cdc_source_job - || (with_properties.is_kafka_connector() && session.config().rw_enable_shared_source()); + ensure_table_constraints_supported(&constraints)?; + let sql_pk_names = bind_sql_pk_names(sql_columns_defs, &constraints)?; - let (columns_from_resolve_source, mut source_info) = if create_cdc_source_job { - bind_columns_from_source_for_cdc(&session, &source_schema, &with_properties)? - } else { - bind_columns_from_source(&session, &source_schema, &with_properties).await? - }; - if is_shared { - // Note: this field should be called is_shared. Check field doc for more details. - source_info.cdc_source_job = true; - source_info.is_distributed = !create_cdc_source_job; - } - let columns_from_sql = bind_sql_columns(&stmt.columns)?; + let columns_from_sql = bind_sql_columns(sql_columns_defs)?; let mut columns = bind_all_columns( &source_schema, columns_from_resolve_source, columns_from_sql, - &stmt.columns, - stmt.wildcard_idx, + sql_columns_defs, + wildcard_idx, )?; + // add additional columns before bind pk, because `format upsert` requires the key column - handle_addition_columns(&with_properties, stmt.include_column_options, &mut columns)?; + handle_addition_columns(&with_properties, include_column_options, &mut columns)?; + // compatible with the behavior that add a hidden column `_rw_kafka_timestamp` to each message from Kafka source + if is_create_source { + // must behind `handle_addition_columns` + check_and_add_timestamp_column(&with_properties, &mut columns); + } + let pk_names = bind_source_pk( &source_schema, &source_info, @@ -1386,70 +1378,128 @@ pub async fn handle_create_source( ) .await?; - // must behind `handle_addition_columns` - check_and_add_timestamp_column(&with_properties, &mut columns); - - let mut col_id_gen = ColumnIdGenerator::new_initial(); - for c in &mut columns { - c.column_desc.column_id = col_id_gen.generate(c.name()) - } - - if !pk_names.is_empty() { + if is_create_source && !pk_names.is_empty() { return Err(ErrorCode::InvalidInputSyntax( "Source does not support PRIMARY KEY constraint, please use \"CREATE TABLE\" instead" .to_owned(), ) .into()); } - let (mut columns, pk_column_ids, row_id_index) = - bind_pk_on_relation(columns, pk_names, with_properties.connector_need_pk())?; + for c in &mut columns { + c.column_desc.column_id = col_id_gen.generate(c.name()) + } debug_assert!(is_column_ids_dedup(&columns)); + let (mut columns, pk_col_ids, row_id_index) = bind_pk_on_relation(columns, pk_names, true)?; + let watermark_descs = - bind_source_watermark(&session, name.clone(), stmt.source_watermarks, &columns)?; + bind_source_watermark(session, source_name.clone(), source_watermarks, &columns)?; // TODO(yuhao): allow multiple watermark on source. assert!(watermark_descs.len() <= 1); bind_sql_column_constraints( - &session, - name.clone(), + session, + source_name.clone(), &mut columns, - stmt.columns, - &pk_column_ids, + // TODO(st1page): pass the ref + sql_columns_defs.to_vec(), + &pk_col_ids, )?; - check_source_schema(&with_properties, row_id_index, &columns).await?; - let pk_column_ids = pk_column_ids.into_iter().map(Into::into).collect(); - - let mut with_options = WithOptions::new(with_properties); - // resolve privatelink connection for Kafka source + // resolve privatelink connection for Kafka + let mut with_properties = WithOptions::new(with_properties); let connection_id = - resolve_privatelink_in_with_option(&mut with_options, &schema_name, &session)?; - let definition = handler_args.normalized_sql.clone(); + resolve_privatelink_in_with_option(&mut with_properties, &schema_name, session)?; + + let definition: String = handler_args.normalized_sql.clone(); - let source = PbSource { + let associated_table_id = if is_create_source { + None + } else { + Some(TableId::placeholder()) + }; + let source = SourceCatalog { id: TableId::placeholder().table_id, - schema_id, - database_id, - name, - row_id_index: row_id_index.map(|idx| idx as u32), - columns: columns.iter().map(|c| c.to_protobuf()).collect_vec(), - pk_column_ids, - with_properties: with_options.into_inner().into_iter().collect(), - info: Some(source_info), + name: source_name, + columns, + pk_col_ids, + append_only: row_id_index.is_some(), owner: session.user_id(), + info: source_info, + row_id_index, + with_properties: with_properties.into_inner().into_iter().collect(), watermark_descs, + associated_table_id, definition, connection_id, - initialized_at_epoch: None, created_at_epoch: None, - optional_associated_table_id: None, + initialized_at_epoch: None, version: INITIAL_SOURCE_VERSION_ID, - initialized_at_cluster_version: None, created_at_cluster_version: None, + initialized_at_cluster_version: None, + }; + Ok((source, database_id, schema_id)) +} + +pub async fn handle_create_source( + handler_args: HandlerArgs, + stmt: CreateSourceStatement, +) -> Result { + let session = handler_args.session.clone(); + + if let Either::Right(resp) = session.check_relation_name_duplicated( + stmt.source_name.clone(), + StatementType::CREATE_SOURCE, + stmt.if_not_exists, + )? { + return Ok(resp); + } + + if handler_args.with_options.is_empty() { + return Err(RwError::from(InvalidInputSyntax( + "missing WITH clause".to_string(), + ))); + } + + let source_schema = stmt.source_schema.into_v2_with_warning(); + let with_properties = bind_connector_props(&handler_args, &source_schema, true)?; + + let create_cdc_source_job = with_properties.is_shareable_cdc_connector(); + let is_shared = create_cdc_source_job + || (with_properties.is_kafka_connector() && session.config().rw_enable_shared_source()); + + let (columns_from_resolve_source, mut source_info) = if create_cdc_source_job { + bind_columns_from_source_for_cdc(&session, &source_schema, &with_properties)? + } else { + bind_columns_from_source(&session, &source_schema, &with_properties).await? }; + if is_shared { + // Note: this field should be called is_shared. Check field doc for more details. + source_info.cdc_source_job = true; + source_info.is_distributed = !create_cdc_source_job; + } + let mut col_id_gen = ColumnIdGenerator::new_initial(); + + let (source_catalog, database_id, schema_id) = bind_create_source( + handler_args.clone(), + stmt.source_name, + source_schema, + with_properties, + &stmt.columns, + stmt.constraints, + stmt.wildcard_idx, + stmt.source_watermarks, + columns_from_resolve_source, + source_info, + stmt.include_column_options, + &mut col_id_gen, + true, + ) + .await?; + + let source = source_catalog.to_prost(schema_id, database_id); let catalog_writer = session.catalog_writer()?; @@ -1457,7 +1507,7 @@ pub async fn handle_create_source( let graph = { let context = OptimizerContext::from_handler_args(handler_args); let source_node = LogicalSource::with_catalog( - Rc::new(SourceCatalog::from(&source)), + Rc::new(source_catalog), SourceNodeKind::CreateSharedSource, context.into(), None, diff --git a/src/frontend/src/handler/create_table.rs b/src/frontend/src/handler/create_table.rs index 76608c40d068a..3689d30d3793f 100644 --- a/src/frontend/src/handler/create_table.rs +++ b/src/frontend/src/handler/create_table.rs @@ -24,16 +24,15 @@ use pgwire::pg_response::{PgResponse, StatementType}; use risingwave_common::bail_not_implemented; use risingwave_common::catalog::{ CdcTableDesc, ColumnCatalog, ColumnDesc, TableId, TableVersionId, DEFAULT_SCHEMA_NAME, - INITIAL_SOURCE_VERSION_ID, INITIAL_TABLE_VERSION_ID, USER_COLUMN_ID_OFFSET, + INITIAL_TABLE_VERSION_ID, USER_COLUMN_ID_OFFSET, }; use risingwave_common::util::sort_util::{ColumnOrder, OrderType}; use risingwave_common::util::value_encoding::DatumToProtoExt; +use risingwave_connector::source; use risingwave_connector::source::cdc::external::{ DATABASE_NAME_KEY, SCHEMA_NAME_KEY, TABLE_NAME_KEY, }; -use risingwave_connector::{source, WithPropertiesExt}; -use risingwave_pb::catalog::source::OptionalAssociatedTableId; -use risingwave_pb::catalog::{PbSource, PbTable, StreamSourceInfo, Table, WatermarkDesc}; +use risingwave_pb::catalog::{PbSource, PbTable, Table, WatermarkDesc}; use risingwave_pb::ddl_service::TableJobType; use risingwave_pb::plan_common::column_desc::GeneratedOrDefaultColumn; use risingwave_pb::plan_common::{ @@ -52,12 +51,12 @@ use crate::binder::{bind_data_type, bind_struct_field, Clause}; use crate::catalog::root_catalog::SchemaPath; use crate::catalog::source_catalog::SourceCatalog; use crate::catalog::table_catalog::TableVersion; -use crate::catalog::{check_valid_column_name, ColumnId}; +use crate::catalog::{check_valid_column_name, ColumnId, DatabaseId, SchemaId}; use crate::error::{ErrorCode, Result, RwError}; use crate::expr::{Expr, ExprImpl, ExprRewriter, InlineNowProcTime}; use crate::handler::create_source::{ - bind_all_columns, bind_columns_from_source, bind_connector_props, bind_source_pk, - bind_source_watermark, check_source_schema, handle_addition_columns, UPSTREAM_SOURCE_KEY, + bind_columns_from_source, bind_connector_props, bind_create_source, bind_source_watermark, + UPSTREAM_SOURCE_KEY, }; use crate::handler::HandlerArgs; use crate::optimizer::plan_node::generic::{CdcScanOptions, SourceNodeKind}; @@ -66,7 +65,6 @@ use crate::optimizer::property::{Order, RequiredDist}; use crate::optimizer::{OptimizerContext, OptimizerContextRef, PlanRef, PlanRoot}; use crate::session::SessionImpl; use crate::stream_fragmenter::build_graph; -use crate::utils::resolve_privatelink_in_with_option; use crate::{Binder, TableCatalog, WithOptions}; /// Column ID generator for a new table or a new version of an existing table to alter. @@ -482,82 +480,42 @@ pub(crate) async fn gen_create_table_plan_with_source( let session = &handler_args.session; let with_properties = bind_connector_props(&handler_args, &source_schema, false)?; - ensure_table_constraints_supported(&constraints)?; - - let sql_pk_names = bind_sql_pk_names(&column_defs, &constraints)?; - let (columns_from_resolve_source, source_info) = bind_columns_from_source(session, &source_schema, &with_properties).await?; - let columns_from_sql = bind_sql_columns(&column_defs)?; - let mut columns = bind_all_columns( - &source_schema, - columns_from_resolve_source, - columns_from_sql, + let (source_catalog, database_id, schema_id) = bind_create_source( + handler_args.clone(), + table_name, + source_schema, + with_properties, &column_defs, + constraints, wildcard_idx, - )?; - - // add additional columns before bind pk, because `format upsert` requires the key column - handle_addition_columns(&with_properties, include_column_options, &mut columns)?; - let pk_names = bind_source_pk( - &source_schema, - &source_info, - &mut columns, - sql_pk_names, - &with_properties, + source_watermarks, + columns_from_resolve_source, + source_info, + include_column_options, + &mut col_id_gen, + false, ) .await?; - for c in &mut columns { - c.column_desc.column_id = col_id_gen.generate(c.name()) - } - - if with_properties.is_iceberg_connector() { - return Err( - ErrorCode::BindError("can't create table with iceberg connector".to_string()).into(), - ); - } - let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names, true)?; - - let watermark_descs = bind_source_watermark( - session, - table_name.real_value(), - source_watermarks, - &columns, - )?; - // TODO(yuhao): allow multiple watermark on source. - assert!(watermark_descs.len() <= 1); - - let definition = handler_args.normalized_sql.clone(); - - bind_sql_column_constraints( - session, - table_name.real_value(), - &mut columns, - column_defs, - &pk_column_ids, - )?; - - check_source_schema(&with_properties, row_id_index, &columns).await?; + let pb_source = source_catalog.to_prost(schema_id, database_id); let context = OptimizerContext::new(handler_args, explain_options); - gen_table_plan_inner( + let (plan, table) = gen_table_plan_with_source( context.into(), - table_name, - columns, - with_properties, - pk_column_ids, - row_id_index, - Some(source_info), - definition, - watermark_descs, + source_catalog, append_only, on_conflict, with_version_column, Some(col_id_gen.into_version()), - ) + database_id, + schema_id, + )?; + + Ok((plan, Some(pb_source), table)) } /// `gen_create_table_plan` generates the plan for creating a table without an external stream @@ -572,14 +530,14 @@ pub(crate) fn gen_create_table_plan( append_only: bool, on_conflict: Option, with_version_column: Option, -) -> Result<(PlanRef, Option, PbTable)> { +) -> Result<(PlanRef, PbTable)> { let definition = context.normalized_sql().to_owned(); let mut columns = bind_sql_columns(&column_defs)?; for c in &mut columns { c.column_desc.column_id = col_id_gen.generate(c.name()) } let with_properties = context.with_options().inner().clone().into_iter().collect(); - gen_create_table_plan_without_bind( + gen_create_table_plan_without_source( context, table_name, columns, @@ -596,7 +554,7 @@ pub(crate) fn gen_create_table_plan( } #[allow(clippy::too_many_arguments)] -pub(crate) fn gen_create_table_plan_without_bind( +pub(crate) fn gen_create_table_plan_without_source( context: OptimizerContext, table_name: ObjectName, columns: Vec, @@ -609,12 +567,12 @@ pub(crate) fn gen_create_table_plan_without_bind( on_conflict: Option, with_version_column: Option, version: Option, -) -> Result<(PlanRef, Option, PbTable)> { +) -> Result<(PlanRef, PbTable)> { ensure_table_constraints_supported(&constraints)?; let pk_names = bind_sql_pk_names(&column_defs, &constraints)?; let (mut columns, pk_column_ids, row_id_index) = bind_pk_on_relation(columns, pk_names, true)?; - let watermark_descs = bind_source_watermark( + let watermark_descs: Vec = bind_source_watermark( context.session_ctx(), table_name.real_value(), source_watermarks, @@ -628,33 +586,71 @@ pub(crate) fn gen_create_table_plan_without_bind( column_defs, &pk_column_ids, )?; + let session = context.session_ctx().clone(); + + let db_name = session.database(); + let (schema_name, name) = Binder::resolve_schema_qualified_name(db_name, table_name)?; + let (database_id, schema_id) = + session.get_database_and_schema_id_for_create(schema_name.clone())?; gen_table_plan_inner( context.into(), - table_name, + name, columns, with_properties, pk_column_ids, row_id_index, - None, definition, watermark_descs, append_only, on_conflict, with_version_column, version, + None, + database_id, + schema_id, + ) +} + +fn gen_table_plan_with_source( + context: OptimizerContextRef, + source_catalog: SourceCatalog, + append_only: bool, + on_conflict: Option, + with_version_column: Option, + version: Option, /* TODO: this should always be `Some` if we support `ALTER + * TABLE` for `CREATE TABLE AS`. */ + database_id: DatabaseId, + schema_id: SchemaId, +) -> Result<(PlanRef, PbTable)> { + let cloned_source_catalog = source_catalog.clone(); + gen_table_plan_inner( + context, + source_catalog.name, + source_catalog.columns, + source_catalog.with_properties.clone().into_iter().collect(), + source_catalog.pk_col_ids, + source_catalog.row_id_index, + source_catalog.definition, + source_catalog.watermark_descs, + append_only, + on_conflict, + with_version_column, + version, + Some(cloned_source_catalog), + database_id, + schema_id, ) } #[allow(clippy::too_many_arguments)] fn gen_table_plan_inner( context: OptimizerContextRef, - table_name: ObjectName, + table_name: String, columns: Vec, with_properties: HashMap, pk_column_ids: Vec, row_id_index: Option, - source_info: Option, definition: String, watermark_descs: Vec, append_only: bool, @@ -662,51 +658,16 @@ fn gen_table_plan_inner( with_version_column: Option, version: Option, /* TODO: this should always be `Some` if we support `ALTER * TABLE` for `CREATE TABLE AS`. */ -) -> Result<(PlanRef, Option, PbTable)> { + source_catalog: Option, + database_id: DatabaseId, + schema_id: SchemaId, +) -> Result<(PlanRef, PbTable)> { let session = context.session_ctx().clone(); - let db_name = session.database(); - let (schema_name, name) = Binder::resolve_schema_qualified_name(db_name, table_name)?; - let (database_id, schema_id) = - session.get_database_and_schema_id_for_create(schema_name.clone())?; - - // resolve privatelink connection for Table backed by Kafka source - let mut with_properties = WithOptions::new(with_properties); - let connection_id = - resolve_privatelink_in_with_option(&mut with_properties, &schema_name, &session)?; + let with_properties = WithOptions::new(with_properties); let retention_seconds = with_properties.retention_seconds(); - - let is_external_source = source_info.is_some(); - - let source = source_info.map(|source_info| PbSource { - id: TableId::placeholder().table_id, - schema_id, - database_id, - name: name.clone(), - row_id_index: row_id_index.map(|i| i as _), - columns: columns - .iter() - .map(|column| column.to_protobuf()) - .collect_vec(), - pk_column_ids: pk_column_ids.iter().map(Into::into).collect_vec(), - with_properties: with_properties.into_inner().into_iter().collect(), - info: Some(source_info), - owner: session.user_id(), - watermark_descs: watermark_descs.clone(), - definition: "".to_string(), - connection_id, - initialized_at_epoch: None, - created_at_epoch: None, - optional_associated_table_id: Some(OptionalAssociatedTableId::AssociatedTableId( - TableId::placeholder().table_id, - )), - version: INITIAL_SOURCE_VERSION_ID, - initialized_at_cluster_version: None, - created_at_cluster_version: None, - }); - - let source_catalog = source.as_ref().map(|source| Rc::new((source).into())); + let is_external_source = source_catalog.is_some(); let source_node: PlanRef = LogicalSource::new( - source_catalog.clone(), + source_catalog.map(|source| Rc::new(source.clone())), columns.clone(), row_id_index, SourceNodeKind::CreateTable, @@ -749,7 +710,7 @@ fn gen_table_plan_inner( let materialize = plan_root.gen_table_plan( context, - name, + table_name, columns, definition, pk_column_ids, @@ -766,7 +727,7 @@ fn gen_table_plan_inner( let mut table = materialize.table().to_prost(schema_id, database_id); table.owner = session.user_id(); - Ok((materialize.into(), source, table)) + Ok((materialize.into(), table)) } #[allow(clippy::too_many_arguments)] @@ -976,20 +937,19 @@ pub(super) async fn handle_create_table_plan( ), (None, None) => { let context = OptimizerContext::new(handler_args, explain_options); - ( - gen_create_table_plan( - context, - table_name.clone(), - column_defs, - constraints, - col_id_gen, - source_watermarks, - append_only, - on_conflict, - with_version_column, - )?, - TableJobType::General, - ) + let (plan, table) = gen_create_table_plan( + context, + table_name.clone(), + column_defs, + constraints, + col_id_gen, + source_watermarks, + append_only, + on_conflict, + with_version_column, + )?; + + ((plan, None, table), TableJobType::General) } (None, Some(cdc_table)) => { @@ -1186,7 +1146,7 @@ pub async fn generate_stream_graph_for_table( } None => { let context = OptimizerContext::from_handler_args(handler_args); - gen_create_table_plan( + let (plan, table) = gen_create_table_plan( context, table_name, columns, @@ -1196,7 +1156,8 @@ pub async fn generate_stream_graph_for_table( append_only, on_conflict, with_version_column, - )? + )?; + (plan, None, table) } }; diff --git a/src/frontend/src/handler/create_table_as.rs b/src/frontend/src/handler/create_table_as.rs index a828cb9708b48..bdd6e9d078e90 100644 --- a/src/frontend/src/handler/create_table_as.rs +++ b/src/frontend/src/handler/create_table_as.rs @@ -22,7 +22,7 @@ use risingwave_sqlparser::ast::{ColumnDef, ObjectName, OnConflict, Query, Statem use super::{HandlerArgs, RwPgResponse}; use crate::binder::BoundStatement; use crate::error::{ErrorCode, Result}; -use crate::handler::create_table::{gen_create_table_plan_without_bind, ColumnIdGenerator}; +use crate::handler::create_table::{gen_create_table_plan_without_source, ColumnIdGenerator}; use crate::handler::query::handle_query; use crate::{build_graph, Binder, OptimizerContext}; pub async fn handle_create_as( @@ -96,7 +96,7 @@ pub async fn handle_create_as( .clone() .into_iter() .collect(); - let (plan, source, table) = gen_create_table_plan_without_bind( + let (plan, table) = gen_create_table_plan_without_source( context, table_name.clone(), columns, @@ -118,7 +118,7 @@ pub async fn handle_create_as( .map(|parallelism| Parallelism { parallelism: parallelism.get(), }); - (graph, source, table) + (graph, None, table) }; tracing::trace!( From 7419439ff721e737a706d3a004e5e0084e10d391 Mon Sep 17 00:00:00 2001 From: xxchan Date: Tue, 14 May 2024 14:14:46 +0800 Subject: [PATCH 06/11] feat(source): let shared SourceExecutor start from latest instead of specified offset (#16626) Signed-off-by: xxchan --- .../source_inline/kafka/shared_source.slt | 43 +++++++++++-------- .../src/source/kafka/enumerator/client.rs | 2 + .../src/source/kafka/source/reader.rs | 4 +- src/connector/src/source/kafka/split.rs | 12 ++++++ .../src/executor/source/source_executor.rs | 18 ++++++++ 5 files changed, 59 insertions(+), 20 deletions(-) diff --git a/e2e_test/source_inline/kafka/shared_source.slt b/e2e_test/source_inline/kafka/shared_source.slt index 57ab0b95de9b5..f180e6e0d8351 100644 --- a/e2e_test/source_inline/kafka/shared_source.slt +++ b/e2e_test/source_inline/kafka/shared_source.slt @@ -28,7 +28,7 @@ select count(*) from rw_internal_tables where name like '%s0%'; sleep 1s -# Ingestion does not start (state table is empty), even after sleep +# SourceExecutor's ingestion does not start (state table is empty), even after sleep system ok internal_table.mjs --name s0 --type source ---- @@ -41,28 +41,21 @@ create materialized view mv_1 as select * from s0; # Wait enough time to ensure SourceExecutor consumes all Kafka data. sleep 2s -# Ingestion started +# SourceExecutor's ingestion started, but it only starts from latest. system ok internal_table.mjs --name s0 --type source ---- -0,"{""split_info"": {""partition"": 0, ""start_offset"": 0, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}" -1,"{""split_info"": {""partition"": 1, ""start_offset"": 0, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}" -2,"{""split_info"": {""partition"": 2, ""start_offset"": 0, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}" -3,"{""split_info"": {""partition"": 3, ""start_offset"": 0, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}" - +(empty) -# The result is non-deterministic: -# If the upstream row comes before the backfill row, it will be ignored, and the result state is Backfilling. -# If the upstream row comes after the backfill row, the result state is Finished. -# Uncomment below and run manually to see the result. -# system ok -# internal_table.mjs --name mv_1 --type sourcebackfill -# ---- -# 0,"{""Backfilling"": ""0""}" -# 1,"{""Backfilling"": ""0""}" -# 2,"{""Backfilling"": ""0""}" -# 3,"{""Backfilling"": ""0""}" +# offset 0 must be backfilled, not from upstream. +system ok +internal_table.mjs --name mv_1 --type sourcebackfill +---- +0,"{""Backfilling"": ""0""}" +1,"{""Backfilling"": ""0""}" +2,"{""Backfilling"": ""0""}" +3,"{""Backfilling"": ""0""}" # This does not affect the behavior for CREATE MATERIALIZED VIEW below. It also uses the shared source, and creates SourceBackfillExecutor. @@ -108,6 +101,16 @@ EOF sleep 2s +# SourceExecutor's finally got new data now. +system ok +internal_table.mjs --name s0 --type source +---- +0,"{""split_info"": {""partition"": 0, ""start_offset"": 1, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}" +1,"{""split_info"": {""partition"": 1, ""start_offset"": 1, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}" +2,"{""split_info"": {""partition"": 2, ""start_offset"": 1, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}" +3,"{""split_info"": {""partition"": 3, ""start_offset"": 1, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}" + + query IT rowsort select v1, v2 from s0; ---- @@ -143,7 +146,9 @@ internal_table.mjs --name s0 --type source 3,"{""split_info"": {""partition"": 3, ""start_offset"": 1, ""stop_offset"": null, ""topic"": ""shared_source""}, ""split_type"": ""kafka""}" -# Same as above, the result is still non-deterministic: Some partitions may be: "{""Backfilling"": ""1""}" +# The result is non-deterministic: +# If the upstream row comes before the backfill row, it will be ignored, and the result state is "{""Backfilling"": ""1""}". +# If the upstream row comes after the backfill row, the result state is Finished. # Uncomment below and run manually to see the result. # system ok diff --git a/src/connector/src/source/kafka/enumerator/client.rs b/src/connector/src/source/kafka/enumerator/client.rs index 40b952f08216c..d4dddad67bfdf 100644 --- a/src/connector/src/source/kafka/enumerator/client.rs +++ b/src/connector/src/source/kafka/enumerator/client.rs @@ -151,6 +151,7 @@ impl SplitEnumerator for KafkaSplitEnumerator { partition, start_offset: start_offsets.remove(&partition).unwrap(), stop_offset: stop_offsets.remove(&partition).unwrap(), + hack_seek_to_latest: false, }) .collect(); @@ -254,6 +255,7 @@ impl KafkaSplitEnumerator { partition: *partition, start_offset: Some(start_offset), stop_offset: Some(stop_offset), + hack_seek_to_latest:false } }) .collect::>()) diff --git a/src/connector/src/source/kafka/source/reader.rs b/src/connector/src/source/kafka/source/reader.rs index a28ecb67b0f98..e3c59611d1acc 100644 --- a/src/connector/src/source/kafka/source/reader.rs +++ b/src/connector/src/source/kafka/source/reader.rs @@ -106,7 +106,9 @@ impl SplitReader for KafkaSplitReader { for split in splits { offsets.insert(split.id(), (split.start_offset, split.stop_offset)); - if let Some(offset) = split.start_offset { + if split.hack_seek_to_latest { + tpl.add_partition_offset(split.topic.as_str(), split.partition, Offset::End)?; + } else if let Some(offset) = split.start_offset { tpl.add_partition_offset( split.topic.as_str(), split.partition, diff --git a/src/connector/src/source/kafka/split.rs b/src/connector/src/source/kafka/split.rs index a649a729c0397..7ad0d52619b4a 100644 --- a/src/connector/src/source/kafka/split.rs +++ b/src/connector/src/source/kafka/split.rs @@ -24,6 +24,12 @@ pub struct KafkaSplit { pub(crate) partition: i32, pub(crate) start_offset: Option, pub(crate) stop_offset: Option, + #[serde(skip)] + /// Used by shared source to hackily seek to the latest offset without fetching start offset first. + /// XXX: But why do we fetch low watermark for latest start offset..? + /// + /// When this is `true`, `start_offset` will be ignored. + pub(crate) hack_seek_to_latest: bool, } impl SplitMetaData for KafkaSplit { @@ -58,10 +64,16 @@ impl KafkaSplit { partition, start_offset, stop_offset, + hack_seek_to_latest: false, } } pub fn get_topic_and_partition(&self) -> (String, i32) { (self.topic.clone(), self.partition) } + + /// This should only be used for a fresh split, not persisted in state table yet. + pub fn seek_to_latest_offset(&mut self) { + self.hack_seek_to_latest = true; + } } diff --git a/src/stream/src/executor/source/source_executor.rs b/src/stream/src/executor/source/source_executor.rs index 455b6e0fc33e4..5295549ed0455 100644 --- a/src/stream/src/executor/source/source_executor.rs +++ b/src/stream/src/executor/source/source_executor.rs @@ -428,6 +428,24 @@ impl SourceExecutor { .await? { *ele = recover_state; + } else { + // This is a new split, not in state table. + if self.is_shared { + // For shared source, we start from latest and let the downstream SourceBackfillExecutors to read historical data. + // It's highly probable that the work of scanning historical data cannot be shared, + // so don't waste work on it. + // For more details, see https://github.com/risingwavelabs/risingwave/issues/16576#issuecomment-2095413297 + if ele.is_cdc_split() { + // shared CDC source already starts from latest. + continue; + } + match ele { + SplitImpl::Kafka(split) => { + split.seek_to_latest_offset(); + } + _ => unreachable!("only kafka source can be shared, got {:?}", ele), + } + } } } From 76278e96d4a59a728be0a367d07763ed8776b644 Mon Sep 17 00:00:00 2001 From: August Date: Tue, 14 May 2024 14:25:18 +0800 Subject: [PATCH 07/11] feat: make pg meta backend by default in docker compose (#16724) --- docker/docker-compose-distributed-etcd.yml | 379 +++++++++++++++++++++ docker/docker-compose-distributed.yml | 65 +--- docker/docker-compose-etcd.yml | 278 +++++++++++++++ docker/docker-compose-with-azblob.yml | 14 +- docker/docker-compose-with-gcs.yml | 14 +- docker/docker-compose-with-local-fs.yml | 14 +- docker/docker-compose-with-obs.yml | 14 +- docker/docker-compose-with-oss.yml | 14 +- docker/docker-compose-with-s3.yml | 14 +- docker/docker-compose.yml | 64 +--- 10 files changed, 734 insertions(+), 136 deletions(-) create mode 100644 docker/docker-compose-distributed-etcd.yml create mode 100644 docker/docker-compose-etcd.yml diff --git a/docker/docker-compose-distributed-etcd.yml b/docker/docker-compose-distributed-etcd.yml new file mode 100644 index 0000000000000..d0297a132b8fc --- /dev/null +++ b/docker/docker-compose-distributed-etcd.yml @@ -0,0 +1,379 @@ +--- +version: "3" +x-image: &image + image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.2} +services: + compactor-0: + <<: *image + command: + - compactor-node + - "--listen-addr" + - "0.0.0.0:6660" + - "--advertise-addr" + - "compactor-0:6660" + - "--prometheus-listener-addr" + - "0.0.0.0:1260" + - "--meta-address" + - "http://meta-node-0:5690" + - "--config-path" + - /risingwave.toml + expose: + - "6660" + - "1260" + ports: [] + depends_on: + - meta-node-0 + # - minio-0 + volumes: + - "./risingwave.toml:/risingwave.toml" + environment: + RUST_BACKTRACE: "1" + # If ENABLE_TELEMETRY is not set, telemetry will start by default + ENABLE_TELEMETRY: ${ENABLE_TELEMETRY:-true} + container_name: compactor-0 + healthcheck: + test: + - CMD-SHELL + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/6660; exit $$?;' + interval: 1s + timeout: 5s + retries: 5 + restart: always + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 1G + compute-node-0: + <<: *image + command: + - compute-node + - "--listen-addr" + - "0.0.0.0:5688" + - "--advertise-addr" + - "compute-node-0:5688" + - "--prometheus-listener-addr" + - "0.0.0.0:1222" + - "--meta-address" + - "http://meta-node-0:5690" + - "--config-path" + - /risingwave.toml + expose: + - "5688" + - "1222" + ports: [] + depends_on: + - meta-node-0 + # - minio-0 + volumes: + - "./risingwave.toml:/risingwave.toml" + environment: + RUST_BACKTRACE: "1" + # If ENABLE_TELEMETRY is not set, telemetry will start by default + ENABLE_TELEMETRY: ${ENABLE_TELEMETRY:-true} + container_name: compute-node-0 + healthcheck: + test: + - CMD-SHELL + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/5688; exit $$?;' + interval: 1s + timeout: 5s + retries: 5 + restart: always + deploy: + resources: + limits: + memory: 26G + reservations: + memory: 26G + etcd-0: + image: "quay.io/coreos/etcd:v3.5.10" + command: + - /usr/local/bin/etcd + - "--listen-client-urls" + - "http://0.0.0.0:2388" + - "--advertise-client-urls" + - "http://etcd-0:2388" + - "--listen-peer-urls" + - "http://0.0.0.0:2389" + - "--initial-advertise-peer-urls" + - "http://etcd-0:2389" + - "--listen-metrics-urls" + - "http://0.0.0.0:2379" + - "--name" + - risedev-meta + - "--max-txn-ops" + - "999999" + - "--max-request-bytes" + - "10485760" + - "--auto-compaction-mode" + - periodic + - "--auto-compaction-retention" + - 1m + - "--snapshot-count" + - "10000" + - "--data-dir" + - /etcd-data + expose: + - "2388" + ports: + - "2388:2388" + - "2389:2389" + depends_on: [] + volumes: + - "etcd-0:/etcd-data" + environment: {} + container_name: etcd-0 + healthcheck: + test: + - CMD + - etcdctl + - --endpoints=http://localhost:2388 + - endpoint + - health + interval: 1s + timeout: 5s + retries: 5 + restart: always + frontend-node-0: + <<: *image + command: + - frontend-node + - "--listen-addr" + - "0.0.0.0:4566" + - "--meta-addr" + - "http://meta-node-0:5690" + - "--advertise-addr" + - "frontend-node-0:4566" + - "--config-path" + - /risingwave.toml + - "--prometheus-listener-addr" + - "0.0.0.0:2222" + expose: + - "4566" + ports: + - "4566:4566" + depends_on: + - meta-node-0 + volumes: + - "./risingwave.toml:/risingwave.toml" + environment: + RUST_BACKTRACE: "1" + # If ENABLE_TELEMETRY is not set, telemetry will start by default + ENABLE_TELEMETRY: ${ENABLE_TELEMETRY:-true} + container_name: frontend-node-0 + healthcheck: + test: + - CMD-SHELL + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/4566; exit $$?;' + interval: 1s + timeout: 5s + retries: 5 + restart: always + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 1G + grafana-0: + image: "grafana/grafana-oss:latest" + command: [] + expose: + - "3001" + ports: + - "3001:3001" + depends_on: [] + volumes: + - "grafana-0:/var/lib/grafana" + - "./grafana.ini:/etc/grafana/grafana.ini" + - "./grafana-risedev-datasource.yml:/etc/grafana/provisioning/datasources/grafana-risedev-datasource.yml" + - "./grafana-risedev-dashboard.yml:/etc/grafana/provisioning/dashboards/grafana-risedev-dashboard.yml" + - "./dashboards:/dashboards" + environment: {} + container_name: grafana-0 + healthcheck: + test: + - CMD-SHELL + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/3001; exit $$?;' + interval: 1s + timeout: 5s + retries: 5 + restart: always + meta-node-0: + <<: *image + command: + - meta-node + - "--listen-addr" + - "0.0.0.0:5690" + - "--advertise-addr" + - "meta-node-0:5690" + - "--dashboard-host" + - "0.0.0.0:5691" + - "--prometheus-host" + - "0.0.0.0:1250" + - "--prometheus-endpoint" + - "http://prometheus-0:9500" + - "--backend" + - etcd + - "--etcd-endpoints" + - "etcd-0:2388" + - "--state-store" + - "hummock+minio://hummockadmin:hummockadmin@minio-0:9301/hummock001" + - "--data-directory" + - "hummock_001" + - "--config-path" + - /risingwave.toml + expose: + - "5690" + - "1250" + - "5691" + ports: + - "5690:5690" + - "5691:5691" + depends_on: + - "etcd-0" + volumes: + - "./risingwave.toml:/risingwave.toml" + environment: + RUST_BACKTRACE: "1" + # If ENABLE_TELEMETRY is not set, telemetry will start by default + ENABLE_TELEMETRY: ${ENABLE_TELEMETRY:-true} + container_name: meta-node-0 + healthcheck: + test: + - CMD-SHELL + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/5690; exit $$?;' + interval: 1s + timeout: 5s + retries: 5 + restart: always + deploy: + resources: + limits: + memory: 2G + reservations: + memory: 1G + minio-0: + image: "quay.io/minio/minio:latest" + command: + - server + - "--address" + - "0.0.0.0:9301" + - "--console-address" + - "0.0.0.0:9400" + - /data + expose: + - "9301" + - "9400" + ports: + - "9301:9301" + - "9400:9400" + depends_on: [] + volumes: + - "minio-0:/data" + entrypoint: " + + /bin/sh -c ' + + set -e + + mkdir -p \"/data/hummock001\" + + /usr/bin/docker-entrypoint.sh \"$$0\" \"$$@\" + + '" + environment: + MINIO_CI_CD: "1" + MINIO_PROMETHEUS_AUTH_TYPE: public + MINIO_PROMETHEUS_URL: "http://prometheus-0:9500" + MINIO_ROOT_PASSWORD: hummockadmin + MINIO_ROOT_USER: hummockadmin + MINIO_DOMAIN: "minio-0" + container_name: minio-0 + healthcheck: + test: + - CMD-SHELL + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/9301; exit $$?;' + interval: 1s + timeout: 5s + retries: 5 + restart: always + prometheus-0: + image: "prom/prometheus:latest" + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--web.console.libraries=/usr/share/prometheus/console_libraries" + - "--web.console.templates=/usr/share/prometheus/consoles" + - "--web.listen-address=0.0.0.0:9500" + - "--storage.tsdb.retention.time=30d" + expose: + - "9500" + ports: + - "9500:9500" + depends_on: [] + volumes: + - "prometheus-0:/prometheus" + - "./prometheus.yaml:/etc/prometheus/prometheus.yml" + environment: {} + container_name: prometheus-0 + healthcheck: + test: + - CMD-SHELL + - sh -c 'printf "GET /-/healthy HTTP/1.0\n\n" | nc localhost 9500; exit $$?;' + interval: 1s + timeout: 5s + retries: 5 + restart: always + message_queue: + image: "docker.vectorized.io/vectorized/redpanda:latest" + command: + - redpanda + - start + - "--smp" + - "1" + - "--reserve-memory" + - 0M + - "--memory" + - 4G + - "--overprovisioned" + - "--node-id" + - "0" + - "--check=false" + - "--kafka-addr" + - "PLAINTEXT://0.0.0.0:29092,OUTSIDE://0.0.0.0:9092" + - "--advertise-kafka-addr" + - "PLAINTEXT://message_queue:29092,OUTSIDE://localhost:9092" + expose: + - "29092" + - "9092" + - "9644" + ports: + - "29092:29092" + - "9092:9092" + - "9644:9644" + - "8081:8081" + depends_on: [] + volumes: + - "message_queue:/var/lib/redpanda/data" + environment: {} + container_name: message_queue + healthcheck: + test: curl -f localhost:9644/v1/status/ready + interval: 1s + timeout: 5s + retries: 5 + restart: always +volumes: + etcd-0: + external: false + grafana-0: + external: false + minio-0: + external: false + prometheus-0: + external: false + message_queue: + external: false diff --git a/docker/docker-compose-distributed.yml b/docker/docker-compose-distributed.yml index 55cb1cbcffe3c..6a71b3488c41c 100644 --- a/docker/docker-compose-distributed.yml +++ b/docker/docker-compose-distributed.yml @@ -87,52 +87,22 @@ services: memory: 26G reservations: memory: 26G - etcd-0: - image: "quay.io/coreos/etcd:v3.5.10" - command: - - /usr/local/bin/etcd - - "--listen-client-urls" - - "http://0.0.0.0:2388" - - "--advertise-client-urls" - - "http://etcd-0:2388" - - "--listen-peer-urls" - - "http://0.0.0.0:2389" - - "--initial-advertise-peer-urls" - - "http://etcd-0:2389" - - "--listen-metrics-urls" - - "http://0.0.0.0:2379" - - "--name" - - risedev-meta - - "--max-txn-ops" - - "999999" - - "--max-request-bytes" - - "10485760" - - "--auto-compaction-mode" - - periodic - - "--auto-compaction-retention" - - 1m - - "--snapshot-count" - - "10000" - - "--data-dir" - - /etcd-data + postgres-0: + image: "postgres:15-alpine" + environment: + - POSTGRES_HOST_AUTH_METHOD=trust + - POSTGRES_USER=postgres + - POSTGRES_DB=metadata + - POSTGRES_INITDB_ARGS=--encoding=UTF-8 --lc-collate=C --lc-ctype=C expose: - - "2388" + - "5432" ports: - - "2388:2388" - - "2389:2389" - depends_on: [] + - "8432:5432" volumes: - - "etcd-0:/etcd-data" - environment: {} - container_name: etcd-0 + - "postgres-0:/var/lib/postgresql/data" healthcheck: - test: - - CMD - - etcdctl - - --endpoints=http://localhost:2388 - - endpoint - - health - interval: 1s + test: [ "CMD-SHELL", "pg_isready -U postgres" ] + interval: 2s timeout: 5s retries: 5 restart: always @@ -216,9 +186,9 @@ services: - "--prometheus-endpoint" - "http://prometheus-0:9500" - "--backend" - - etcd - - "--etcd-endpoints" - - "etcd-0:2388" + - sql + - "--sql-endpoint" + - "postgres://postgres:@postgres-0:5432/metadata" - "--state-store" - "hummock+minio://hummockadmin:hummockadmin@minio-0:9301/hummock001" - "--data-directory" @@ -233,7 +203,8 @@ services: - "5690:5690" - "5691:5691" depends_on: - - "etcd-0" + - "postgres-0" + - "minio-0" volumes: - "./risingwave.toml:/risingwave.toml" environment: @@ -367,7 +338,7 @@ services: retries: 5 restart: always volumes: - etcd-0: + postgres-0: external: false grafana-0: external: false diff --git a/docker/docker-compose-etcd.yml b/docker/docker-compose-etcd.yml new file mode 100644 index 0000000000000..05d8d16ffdf98 --- /dev/null +++ b/docker/docker-compose-etcd.yml @@ -0,0 +1,278 @@ +--- +version: "3" +x-image: &image + image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.2} +services: + risingwave-standalone: + <<: *image + command: "standalone --meta-opts=\" \ + --listen-addr 0.0.0.0:5690 \ + --advertise-addr 0.0.0.0:5690 \ + --dashboard-host 0.0.0.0:5691 \ + --prometheus-host 0.0.0.0:1250 \ + --prometheus-endpoint http://prometheus-0:9500 \ + --backend etcd \ + --etcd-endpoints etcd-0:2388 \ + --state-store hummock+minio://hummockadmin:hummockadmin@minio-0:9301/hummock001 \ + --data-directory hummock_001 \ + --config-path /risingwave.toml\" \ + --compute-opts=\" \ + --config-path /risingwave.toml \ + --listen-addr 0.0.0.0:5688 \ + --prometheus-listener-addr 0.0.0.0:1250 \ + --advertise-addr 0.0.0.0:5688 \ + --async-stack-trace verbose \ + #--parallelism 4 \ + #--total-memory-bytes 8589934592 \ + --role both \ + --meta-address http://0.0.0.0:5690\" \ + --frontend-opts=\" \ + --config-path /risingwave.toml \ + --listen-addr 0.0.0.0:4566 \ + --advertise-addr 0.0.0.0:4566 \ + --prometheus-listener-addr 0.0.0.0:1250 \ + --health-check-listener-addr 0.0.0.0:6786 \ + --meta-addr http://0.0.0.0:5690\" \ + --compactor-opts=\" \ + --listen-addr 0.0.0.0:6660 \ + --prometheus-listener-addr 0.0.0.0:1250 \ + --advertise-addr 0.0.0.0:6660 \ + --meta-address http://0.0.0.0:5690\"" + expose: + - "6660" + - "4566" + - "5688" + - "5690" + - "1250" + - "5691" + ports: + - "4566:4566" + - "5690:5690" + - "5691:5691" + - "1250:1250" + depends_on: + - etcd-0 + - minio-0 + volumes: + - "./risingwave.toml:/risingwave.toml" + environment: + RUST_BACKTRACE: "1" + # If ENABLE_TELEMETRY is not set, telemetry will start by default + ENABLE_TELEMETRY: ${ENABLE_TELEMETRY:-true} + container_name: risingwave-standalone + healthcheck: + test: + - CMD-SHELL + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/6660; exit $$?;' + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/5688; exit $$?;' + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/4566; exit $$?;' + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/5690; exit $$?;' + interval: 1s + timeout: 5s + restart: always + deploy: + resources: + limits: + memory: 28G + reservations: + memory: 28G + + etcd-0: + image: "quay.io/coreos/etcd:v3.5.10" + command: + - /usr/local/bin/etcd + - "--listen-client-urls" + - "http://0.0.0.0:2388" + - "--advertise-client-urls" + - "http://etcd-0:2388" + - "--listen-peer-urls" + - "http://0.0.0.0:2389" + - "--initial-advertise-peer-urls" + - "http://etcd-0:2389" + - "--listen-metrics-urls" + - "http://0.0.0.0:2379" + - "--name" + - risedev-meta + - "--max-txn-ops" + - "999999" + - "--max-request-bytes" + - "10485760" + - "--auto-compaction-mode" + - periodic + - "--auto-compaction-retention" + - 1m + - "--snapshot-count" + - "10000" + - "--data-dir" + - /etcd-data + expose: + - "2388" + ports: + - "2388:2388" + - "2389:2389" + depends_on: [] + volumes: + - "etcd-0:/etcd-data" + environment: {} + container_name: etcd-0 + healthcheck: + test: + - CMD + - etcdctl + - --endpoints=http://localhost:2388 + - endpoint + - health + interval: 1s + timeout: 5s + retries: 5 + restart: always + + grafana-0: + image: "grafana/grafana-oss:latest" + command: [] + expose: + - "3001" + ports: + - "3001:3001" + depends_on: [] + volumes: + - "grafana-0:/var/lib/grafana" + - "./grafana.ini:/etc/grafana/grafana.ini" + - "./grafana-risedev-datasource.yml:/etc/grafana/provisioning/datasources/grafana-risedev-datasource.yml" + - "./grafana-risedev-dashboard.yml:/etc/grafana/provisioning/dashboards/grafana-risedev-dashboard.yml" + - "./dashboards:/dashboards" + environment: {} + container_name: grafana-0 + healthcheck: + test: + - CMD-SHELL + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/3001; exit $$?;' + interval: 1s + timeout: 5s + retries: 5 + restart: always + + minio-0: + image: "quay.io/minio/minio:latest" + command: + - server + - "--address" + - "0.0.0.0:9301" + - "--console-address" + - "0.0.0.0:9400" + - /data + expose: + - "9301" + - "9400" + ports: + - "9301:9301" + - "9400:9400" + depends_on: [] + volumes: + - "minio-0:/data" + entrypoint: " + + /bin/sh -c ' + + set -e + + mkdir -p \"/data/hummock001\" + + /usr/bin/docker-entrypoint.sh \"$$0\" \"$$@\" + + '" + environment: + MINIO_CI_CD: "1" + MINIO_PROMETHEUS_AUTH_TYPE: public + MINIO_PROMETHEUS_URL: "http://prometheus-0:9500" + MINIO_ROOT_PASSWORD: hummockadmin + MINIO_ROOT_USER: hummockadmin + MINIO_DOMAIN: "minio-0" + container_name: minio-0 + healthcheck: + test: + - CMD-SHELL + - bash -c 'printf \"GET / HTTP/1.1\n\n\" > /dev/tcp/127.0.0.1/9301; exit $$?;' + interval: 1s + timeout: 5s + retries: 5 + restart: always + + prometheus-0: + image: "prom/prometheus:latest" + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--web.console.libraries=/usr/share/prometheus/console_libraries" + - "--web.console.templates=/usr/share/prometheus/consoles" + - "--web.listen-address=0.0.0.0:9500" + - "--storage.tsdb.retention.time=30d" + expose: + - "9500" + ports: + - "9500:9500" + depends_on: [] + volumes: + - "prometheus-0:/prometheus" + - "./prometheus.yaml:/etc/prometheus/prometheus.yml" + environment: {} + container_name: prometheus-0 + healthcheck: + test: + - CMD-SHELL + - sh -c 'printf "GET /-/healthy HTTP/1.0\n\n" | nc localhost 9500; exit $$?;' + interval: 1s + timeout: 5s + retries: 5 + restart: always + + message_queue: + image: "docker.vectorized.io/vectorized/redpanda:latest" + command: + - redpanda + - start + - "--smp" + - "1" + - "--reserve-memory" + - 0M + - "--memory" + - 4G + - "--overprovisioned" + - "--node-id" + - "0" + - "--check=false" + - "--kafka-addr" + - "PLAINTEXT://0.0.0.0:29092,OUTSIDE://0.0.0.0:9092" + - "--advertise-kafka-addr" + - "PLAINTEXT://message_queue:29092,OUTSIDE://localhost:9092" + expose: + - "29092" + - "9092" + - "9644" + ports: + - "29092:29092" + - "9092:9092" + - "9644:9644" + - "8081:8081" + depends_on: [] + volumes: + - "message_queue:/var/lib/redpanda/data" + environment: {} + container_name: message_queue + healthcheck: + test: curl -f localhost:9644/v1/status/ready + interval: 1s + timeout: 5s + retries: 5 + restart: always +volumes: + etcd-0: + external: false + grafana-0: + external: false + minio-0: + external: false + prometheus-0: + external: false + message_queue: + external: false diff --git a/docker/docker-compose-with-azblob.yml b/docker/docker-compose-with-azblob.yml index e0b44c5768011..a1035180d6a68 100644 --- a/docker/docker-compose-with-azblob.yml +++ b/docker/docker-compose-with-azblob.yml @@ -1,7 +1,7 @@ --- version: "3" x-image: &image - image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.1} + image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.2} services: risingwave-standalone: <<: *image @@ -11,8 +11,8 @@ services: --dashboard-host 0.0.0.0:5691 \ --prometheus-host 0.0.0.0:1250 \ --prometheus-endpoint http://prometheus-0:9500 \ - --backend etcd \ - --etcd-endpoints etcd-0:2388 \ + --backend sql \ + --sql-endpoint postgres://postgres:@postgres-0:5432/metadata \ --state-store hummock+azblob:// \ --data-directory hummock_001 \ --config-path /risingwave.toml\" \ @@ -51,7 +51,7 @@ services: - "5691:5691" - "1250:1250" depends_on: - - etcd-0 + - postgres-0 env_file: multiple_object_storage.env volumes: - "./risingwave.toml:/risingwave.toml" @@ -76,10 +76,10 @@ services: memory: 28G reservations: memory: 28G - etcd-0: + postgres-0: extends: file: docker-compose.yml - service: etcd-0 + service: postgres-0 grafana-0: extends: file: docker-compose.yml @@ -93,7 +93,7 @@ services: file: docker-compose.yml service: message_queue volumes: - etcd-0: + postgres-0: external: false grafana-0: external: false diff --git a/docker/docker-compose-with-gcs.yml b/docker/docker-compose-with-gcs.yml index 847172c2d09c1..64807f252e536 100644 --- a/docker/docker-compose-with-gcs.yml +++ b/docker/docker-compose-with-gcs.yml @@ -1,7 +1,7 @@ --- version: "3" x-image: &image - image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.1} + image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.2} services: risingwave-standalone: <<: *image @@ -11,8 +11,8 @@ services: --dashboard-host 0.0.0.0:5691 \ --prometheus-host 0.0.0.0:1250 \ --prometheus-endpoint http://prometheus-0:9500 \ - --backend etcd \ - --etcd-endpoints etcd-0:2388 \ + --backend sql \ + --sql-endpoint postgres://postgres:@postgres-0:5432/metadata \ --state-store hummock+gcs:// \ --data-directory hummock_001 \ --config-path /risingwave.toml\" \ @@ -51,7 +51,7 @@ services: - "5691:5691" - "1250:1250" depends_on: - - etcd-0 + - postgres-0 env_file: multiple_object_storage.env volumes: - "./risingwave.toml:/risingwave.toml" @@ -76,10 +76,10 @@ services: memory: 28G reservations: memory: 28G - etcd-0: + postgres-0: extends: file: docker-compose.yml - service: etcd-0 + service: postgres-0 grafana-0: extends: file: docker-compose.yml @@ -93,7 +93,7 @@ services: file: docker-compose.yml service: message_queue volumes: - etcd-0: + postgres-0: external: false grafana-0: external: false diff --git a/docker/docker-compose-with-local-fs.yml b/docker/docker-compose-with-local-fs.yml index b45e624c619b3..c44995e063dea 100644 --- a/docker/docker-compose-with-local-fs.yml +++ b/docker/docker-compose-with-local-fs.yml @@ -1,7 +1,7 @@ --- version: "3" x-image: &image - image: ${RW_IMAGE:-risingwavelabs/risingwave:nightly-20231211} + image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.2} services: risingwave-standalone: <<: *image @@ -11,8 +11,8 @@ services: --dashboard-host 0.0.0.0:5691 \ --prometheus-host 0.0.0.0:1250 \ --prometheus-endpoint http://prometheus-0:9500 \ - --backend etcd \ - --etcd-endpoints etcd-0:2388 \ + --backend sql \ + --sql-endpoint postgres://postgres:@postgres-0:5432/metadata \ --state-store hummock+fs:// \ --data-directory hummock_001 \ --config-path /risingwave.toml\" \ @@ -50,7 +50,7 @@ services: - "5691:5691" - "1250:1250" depends_on: - - etcd-0 + - postgres-0 volumes: - "./risingwave.toml:/risingwave.toml" environment: @@ -74,10 +74,10 @@ services: memory: reservations: memory: - etcd-0: + postgres-0: extends: file: docker-compose.yml - service: etcd-0 + service: postgres-0 grafana-0: extends: file: docker-compose.yml @@ -87,7 +87,7 @@ services: file: docker-compose.yml service: prometheus-0 volumes: - etcd-0: + postgres-0: external: false grafana-0: external: false diff --git a/docker/docker-compose-with-obs.yml b/docker/docker-compose-with-obs.yml index 5d0df0ca4f72d..634de89172b41 100644 --- a/docker/docker-compose-with-obs.yml +++ b/docker/docker-compose-with-obs.yml @@ -1,7 +1,7 @@ --- version: "3" x-image: &image - image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.1} + image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.2} services: risingwave-standalone: <<: *image @@ -11,8 +11,8 @@ services: --dashboard-host 0.0.0.0:5691 \ --prometheus-host 0.0.0.0:1250 \ --prometheus-endpoint http://prometheus-0:9500 \ - --backend etcd \ - --etcd-endpoints etcd-0:2388 \ + --backend sql \ + --sql-endpoint postgres://postgres:@postgres-0:5432/metadata \ --state-store hummock+obs:// \ --data-directory hummock_001 \ --config-path /risingwave.toml\" \ @@ -51,7 +51,7 @@ services: - "5691:5691" - "1250:1250" depends_on: - - etcd-0 + - postgres-0 env_file: multiple_object_storage.env volumes: - "./risingwave.toml:/risingwave.toml" @@ -76,10 +76,10 @@ services: memory: 28G reservations: memory: 28G - etcd-0: + postgres-0: extends: file: docker-compose.yml - service: etcd-0 + service: postgres-0 grafana-0: extends: file: docker-compose.yml @@ -93,7 +93,7 @@ services: file: docker-compose.yml service: message_queue volumes: - etcd-0: + postgres-0: external: false grafana-0: external: false diff --git a/docker/docker-compose-with-oss.yml b/docker/docker-compose-with-oss.yml index 7296a7074d5a6..53466e51711ed 100644 --- a/docker/docker-compose-with-oss.yml +++ b/docker/docker-compose-with-oss.yml @@ -1,7 +1,7 @@ --- version: "3" x-image: &image - image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.1} + image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.2} services: risingwave-standalone: <<: *image @@ -11,8 +11,8 @@ services: --dashboard-host 0.0.0.0:5691 \ --prometheus-host 0.0.0.0:1250 \ --prometheus-endpoint http://prometheus-0:9500 \ - --backend etcd \ - --etcd-endpoints etcd-0:2388 \ + --backend sql \ + --sql-endpoint postgres://postgres:@postgres-0:5432/metadata \ --state-store hummock+oss:// \ --data-directory hummock_001 \ --config-path /risingwave.toml\" \ @@ -51,7 +51,7 @@ services: - "5691:5691" - "1250:1250" depends_on: - - etcd-0 + - postgres-0 env_file: multiple_object_storage.env volumes: - "./risingwave.toml:/risingwave.toml" @@ -76,10 +76,10 @@ services: memory: 28G reservations: memory: 28G - etcd-0: + postgres-0: extends: file: docker-compose.yml - service: etcd-0 + service: postgres-0 grafana-0: extends: file: docker-compose.yml @@ -93,7 +93,7 @@ services: file: docker-compose.yml service: message_queue volumes: - etcd-0: + postgres-0: external: false grafana-0: external: false diff --git a/docker/docker-compose-with-s3.yml b/docker/docker-compose-with-s3.yml index 815489f82493e..34e1b02ddd38f 100644 --- a/docker/docker-compose-with-s3.yml +++ b/docker/docker-compose-with-s3.yml @@ -1,7 +1,7 @@ --- version: "3" x-image: &image - image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.1} + image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.2} services: risingwave-standalone: <<: *image @@ -11,8 +11,8 @@ services: --dashboard-host 0.0.0.0:5691 \ --prometheus-host 0.0.0.0:1250 \ --prometheus-endpoint http://prometheus-0:9500 \ - --backend etcd \ - --etcd-endpoints etcd-0:2388 \ + --backend sql \ + --sql-endpoint postgres://postgres:@postgres-0:5432/metadata \ --state-store hummock+s3:// \ --data-directory hummock_001 \ --config-path /risingwave.toml\" \ @@ -51,7 +51,7 @@ services: - "5691:5691" - "1250:1250" depends_on: - - etcd-0 + - postgres-0 env_file: aws.env volumes: - "./risingwave.toml:/risingwave.toml" @@ -76,10 +76,10 @@ services: memory: 28G reservations: memory: 28G - etcd-0: + postgres-0: extends: file: docker-compose.yml - service: etcd-0 + service: postgres-0 grafana-0: extends: file: docker-compose.yml @@ -93,7 +93,7 @@ services: file: docker-compose.yml service: message_queue volumes: - etcd-0: + postgres-0: external: false grafana-0: external: false diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 6259a5757b14f..a94324439a42c 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,7 +1,7 @@ --- version: "3" x-image: &image - image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.1} + image: ${RW_IMAGE:-risingwavelabs/risingwave:v1.8.2} services: risingwave-standalone: <<: *image @@ -11,8 +11,8 @@ services: --dashboard-host 0.0.0.0:5691 \ --prometheus-host 0.0.0.0:1250 \ --prometheus-endpoint http://prometheus-0:9500 \ - --backend etcd \ - --etcd-endpoints etcd-0:2388 \ + --backend sql \ + --sql-endpoint postgres://postgres:@postgres-0:5432/metadata \ --state-store hummock+minio://hummockadmin:hummockadmin@minio-0:9301/hummock001 \ --data-directory hummock_001 \ --config-path /risingwave.toml\" \ @@ -51,7 +51,7 @@ services: - "5691:5691" - "1250:1250" depends_on: - - etcd-0 + - postgres-0 - minio-0 volumes: - "./risingwave.toml:/risingwave.toml" @@ -77,52 +77,22 @@ services: reservations: memory: 28G - etcd-0: - image: "quay.io/coreos/etcd:v3.5.10" - command: - - /usr/local/bin/etcd - - "--listen-client-urls" - - "http://0.0.0.0:2388" - - "--advertise-client-urls" - - "http://etcd-0:2388" - - "--listen-peer-urls" - - "http://0.0.0.0:2389" - - "--initial-advertise-peer-urls" - - "http://etcd-0:2389" - - "--listen-metrics-urls" - - "http://0.0.0.0:2379" - - "--name" - - risedev-meta - - "--max-txn-ops" - - "999999" - - "--max-request-bytes" - - "10485760" - - "--auto-compaction-mode" - - periodic - - "--auto-compaction-retention" - - 1m - - "--snapshot-count" - - "10000" - - "--data-dir" - - /etcd-data + postgres-0: + image: "postgres:15-alpine" + environment: + - POSTGRES_HOST_AUTH_METHOD=trust + - POSTGRES_USER=postgres + - POSTGRES_DB=metadata + - POSTGRES_INITDB_ARGS=--encoding=UTF-8 --lc-collate=C --lc-ctype=C expose: - - "2388" + - "5432" ports: - - "2388:2388" - - "2389:2389" - depends_on: [] + - "8432:5432" volumes: - - "etcd-0:/etcd-data" - environment: {} - container_name: etcd-0 + - "postgres-0:/var/lib/postgresql/data" healthcheck: - test: - - CMD - - etcdctl - - --endpoints=http://localhost:2388 - - endpoint - - health - interval: 1s + test: [ "CMD-SHELL", "pg_isready -U postgres" ] + interval: 2s timeout: 5s retries: 5 restart: always @@ -266,7 +236,7 @@ services: retries: 5 restart: always volumes: - etcd-0: + postgres-0: external: false grafana-0: external: false From ff4951448eda343d99edac9b4fafe42fb944dab4 Mon Sep 17 00:00:00 2001 From: Eric Fu Date: Tue, 14 May 2024 14:27:26 +0800 Subject: [PATCH 08/11] fix: deprecate `SinkPayloadFormat` (#16723) --- .../workflows/connector-node-integration.yml | 12 +- ci/scripts/connector-node-integration-test.sh | 6 +- java/connector-node/python-client/.gitignore | 3 +- .../python-client/integration_tests.py | 21 +- .../connector/JsonDeserializer.java | 246 ------------------ .../connector/SinkWriterStreamObserver.java | 14 +- .../connector/sink/DeserializerTest.java | 44 ---- .../sink/SinkStreamObserverTest.java | 79 +++--- proto/connector_service.proto | 24 +- src/bench/sink_bench/main.rs | 4 +- src/cmd_all/src/standalone.rs | 1 - src/compute/src/lib.rs | 4 - src/compute/src/server.rs | 20 -- src/connector/src/lib.rs | 14 - src/connector/src/sink/mod.rs | 3 - src/connector/src/sink/remote.rs | 30 +-- src/connector/src/source/reader/desc.rs | 5 - src/rpc_client/src/connector_client.rs | 2 - src/stream/src/from_proto/sink.rs | 1 - src/stream/src/from_proto/source/fs_fetch.rs | 1 - .../src/from_proto/source/trad_source.rs | 1 - src/stream/src/task/env.rs | 12 - 22 files changed, 68 insertions(+), 479 deletions(-) delete mode 100644 java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/JsonDeserializer.java delete mode 100644 java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/DeserializerTest.java diff --git a/.github/workflows/connector-node-integration.yml b/.github/workflows/connector-node-integration.yml index f20f209a3953d..abc1942055fb5 100644 --- a/.github/workflows/connector-node-integration.yml +++ b/.github/workflows/connector-node-integration.yml @@ -2,11 +2,11 @@ name: Connector Node Integration Tests on: push: - branches: [main] + branches: [ main ] pull_request: - branches: [main] + branches: [ main ] merge_group: - types: [checks_requested] + types: [ checks_requested ] jobs: build: @@ -42,4 +42,8 @@ jobs: echo "--- build connector node" cd ${RISINGWAVE_ROOT}/java # run unit test - mvn --batch-mode --update-snapshots clean package -Dno-build-rust + # WARN: `testOnNext_writeValidation` is skipped because it relies on Rust code to decode message, + # while we don't build Rust code (`-Dno-build-rust`) here to save time + mvn --batch-mode --update-snapshots clean package -Dno-build-rust \ + '-Dtest=!com.risingwave.connector.sink.SinkStreamObserverTest#testOnNext_writeValidation' \ + -Dsurefire.failIfNoSpecifiedTests=false diff --git a/ci/scripts/connector-node-integration-test.sh b/ci/scripts/connector-node-integration-test.sh index c90430d80ba3e..8853243b66805 100755 --- a/ci/scripts/connector-node-integration-test.sh +++ b/ci/scripts/connector-node-integration-test.sh @@ -90,15 +90,15 @@ export PYTHONPATH=proto echo "--- running streamchunk data format integration tests" cd "${RISINGWAVE_ROOT}"/java/connector-node/python-client -if python3 integration_tests.py --stream_chunk_format_test --input_binary_file="./data/stream_chunk_data" --data_format_use_json=False; then +if python3 integration_tests.py --stream_chunk_format_test --input_binary_file="./data/stream_chunk_data"; then echo "StreamChunk data format test passed" else echo "StreamChunk data format test failed" exit 1 fi -sink_input_feature=("--input_binary_file=./data/sink_input --data_format_use_json=False") -upsert_sink_input_feature=("--input_binary_file=./data/upsert_sink_input --data_format_use_json=False") +sink_input_feature=("--input_binary_file=./data/sink_input") +upsert_sink_input_feature=("--input_binary_file=./data/upsert_sink_input") type=("StreamChunk format") ${MC_PATH} mb minio/bucket diff --git a/java/connector-node/python-client/.gitignore b/java/connector-node/python-client/.gitignore index 600d2d33badf4..536c32383d754 100644 --- a/java/connector-node/python-client/.gitignore +++ b/java/connector-node/python-client/.gitignore @@ -1 +1,2 @@ -.vscode \ No newline at end of file +.vscode +sink-client-venv/ diff --git a/java/connector-node/python-client/integration_tests.py b/java/connector-node/python-client/integration_tests.py index b16b5eaf34ad4..909859afc218a 100644 --- a/java/connector-node/python-client/integration_tests.py +++ b/java/connector-node/python-client/integration_tests.py @@ -117,7 +117,7 @@ def load_stream_chunk_payload(input_file): return payloads -def test_sink(prop, format, payload_input, table_schema, is_coordinated=False): +def test_sink(prop, payload_input, table_schema, is_coordinated=False): sink_param = connector_service_pb2.SinkParam( sink_id=0, properties=prop, @@ -128,7 +128,6 @@ def test_sink(prop, format, payload_input, table_schema, is_coordinated=False): request_list = [ connector_service_pb2.SinkWriterStreamRequest( start=connector_service_pb2.SinkWriterStreamRequest.StartSink( - format=format, sink_param=sink_param, ) ) @@ -291,9 +290,6 @@ def test_stream_chunk_data_format(param): parser.add_argument( "--deltalake_sink", action="store_true", help="run deltalake sink test" ) - parser.add_argument( - "--input_file", default="./data/sink_input.json", help="input data to run tests" - ) parser.add_argument( "--input_binary_file", default="./data/sink_input", @@ -302,29 +298,18 @@ def test_stream_chunk_data_format(param): parser.add_argument( "--es_sink", action="store_true", help="run elasticsearch sink test" ) - parser.add_argument( - "--data_format_use_json", default=True, help="choose json or streamchunk" - ) args = parser.parse_args() - use_json = args.data_format_use_json == True or args.data_format_use_json == "True" - if use_json: - payload = load_json_payload(args.input_file) - format = connector_service_pb2.SinkPayloadFormat.JSON - else: - payload = load_stream_chunk_payload(args.input_binary_file) - format = connector_service_pb2.SinkPayloadFormat.STREAM_CHUNK + payload = load_stream_chunk_payload(args.input_binary_file) # stream chunk format if args.stream_chunk_format_test: param = { - "format": format, "payload_input": payload, "table_schema": make_mock_schema_stream_chunk(), } test_stream_chunk_data_format(param) param = { - "format": format, "payload_input": payload, "table_schema": make_mock_schema(), } @@ -337,7 +322,5 @@ def test_stream_chunk_data_format(param): test_deltalake_sink(param) if args.es_sink: test_elasticsearch_sink(param) - - # json format if args.upsert_iceberg_sink: test_upsert_iceberg_sink(param) diff --git a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/JsonDeserializer.java b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/JsonDeserializer.java deleted file mode 100644 index c941b09efe95c..0000000000000 --- a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/JsonDeserializer.java +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright 2024 RisingWave Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package com.risingwave.connector; - -import static io.grpc.Status.INVALID_ARGUMENT; - -import com.google.gson.Gson; -import com.risingwave.connector.api.TableSchema; -import com.risingwave.connector.api.sink.*; -import com.risingwave.proto.ConnectorServiceProto; -import com.risingwave.proto.ConnectorServiceProto.SinkWriterStreamRequest.WriteBatch.JsonPayload; -import com.risingwave.proto.Data; -import java.math.BigDecimal; -import java.time.LocalDate; -import java.time.LocalDateTime; -import java.time.LocalTime; -import java.time.OffsetDateTime; -import java.util.Base64; -import java.util.Map; -import java.util.stream.Collectors; - -public class JsonDeserializer implements Deserializer { - private final TableSchema tableSchema; - - public JsonDeserializer(TableSchema tableSchema) { - this.tableSchema = tableSchema; - } - - // Encoding here should be consistent with `datum_to_json_object()` in - // src/connector/src/sink/mod.rs - @Override - public CloseableIterable deserialize( - ConnectorServiceProto.SinkWriterStreamRequest.WriteBatch writeBatch) { - if (!writeBatch.hasJsonPayload()) { - throw INVALID_ARGUMENT - .withDescription("expected JsonPayload, got " + writeBatch.getPayloadCase()) - .asRuntimeException(); - } - JsonPayload jsonPayload = writeBatch.getJsonPayload(); - return new TrivialCloseIterable<>( - jsonPayload.getRowOpsList().stream() - .map( - rowOp -> { - Map columnValues = - new Gson().fromJson(rowOp.getLine(), Map.class); - Object[] values = new Object[columnValues.size()]; - for (String columnName : tableSchema.getColumnNames()) { - if (!columnValues.containsKey(columnName)) { - throw INVALID_ARGUMENT - .withDescription( - "column " - + columnName - + " not found in json") - .asRuntimeException(); - } - Data.DataType.TypeName typeName = - tableSchema.getColumnType(columnName); - values[tableSchema.getColumnIndex(columnName)] = - validateJsonDataTypes( - typeName, columnValues.get(columnName)); - } - return (SinkRow) new ArraySinkRow(rowOp.getOpType(), values); - }) - .collect(Collectors.toList())); - } - - private static Long castLong(Object value) { - if (value instanceof Integer) { - return ((Integer) value).longValue(); - } else if (value instanceof Double) { - double d = (Double) value; - if (d % 1.0 != 0.0) { - - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription( - "unable to cast into long from non-integer double value: " + d) - .asRuntimeException(); - } - return ((Double) value).longValue(); - } else if (value instanceof Long) { - return (Long) value; - } else if (value instanceof Short) { - return ((Short) value).longValue(); - } else if (value instanceof Float) { - double f = (Float) value; - if (f % 1.0 != 0.0) { - - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription( - "unable to cast into long from non-integer float value: " + f) - .asRuntimeException(); - } - return ((Float) value).longValue(); - } else { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("unable to cast into long from " + value.getClass()) - .asRuntimeException(); - } - } - - private static Double castDouble(Object value) { - if (value instanceof Double) { - return (Double) value; - } else if (value instanceof Float) { - return ((Float) value).doubleValue(); - } else { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("unable to cast into double from " + value.getClass()) - .asRuntimeException(); - } - } - - private static BigDecimal castDecimal(Object value) { - if (value instanceof String) { - // FIXME(eric): See `datum_to_json_object()` in src/connector/src/sink/mod.rs - return new BigDecimal((String) value); - } else if (value instanceof BigDecimal) { - return (BigDecimal) value; - } else { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("unable to cast into double from " + value.getClass()) - .asRuntimeException(); - } - } - - private static LocalTime castTime(Object value) { - try { - Long milli = castLong(value); - return LocalTime.ofNanoOfDay(milli * 1_000_000L); - } catch (RuntimeException e) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("unable to cast into time from " + value.getClass()) - .asRuntimeException(); - } - } - - private static LocalDate castDate(Object value) { - try { - Long days = castLong(value); - return LocalDate.ofEpochDay(days); - } catch (RuntimeException e) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("unable to cast into date from " + value.getClass()) - .asRuntimeException(); - } - } - - private static Object validateJsonDataTypes(Data.DataType.TypeName typeName, Object value) { - // value might be null - if (value == null) { - return null; - } - switch (typeName) { - case INT16: - return castLong(value).shortValue(); - case INT32: - return castLong(value).intValue(); - case INT64: - return castLong(value); - case VARCHAR: - if (!(value instanceof String)) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("Expected string, got " + value.getClass()) - .asRuntimeException(); - } - return value; - case DOUBLE: - return castDouble(value); - case FLOAT: - return castDouble(value).floatValue(); - case DECIMAL: - return castDecimal(value); - case BOOLEAN: - if (!(value instanceof Boolean)) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("Expected boolean, got " + value.getClass()) - .asRuntimeException(); - } - return value; - case TIMESTAMP: - if (!(value instanceof String)) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription( - "Expected timestamp in string, got " + value.getClass()) - .asRuntimeException(); - } - return LocalDateTime.parse((String) value); - case TIMESTAMPTZ: - if (!(value instanceof String)) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription( - "Expected timestamptz in string, got " + value.getClass()) - .asRuntimeException(); - } - return OffsetDateTime.parse((String) value); - case TIME: - return castTime(value); - case DATE: - return castDate(value); - case INTERVAL: - if (!(value instanceof String)) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("Expected interval, got " + value.getClass()) - .asRuntimeException(); - } - return value; - case JSONB: - if (!(value instanceof String)) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("Expected jsonb, got " + value.getClass()) - .asRuntimeException(); - } - return value; - case BYTEA: - if (!(value instanceof String)) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("Expected bytea, got " + value.getClass()) - .asRuntimeException(); - } - return Base64.getDecoder().decode((String) value); - case LIST: - if (!(value instanceof java.util.ArrayList)) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("Expected list, got " + value.getClass()) - .asRuntimeException(); - } - return ((java.util.ArrayList) value).toArray(); - default: - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("unsupported type " + typeName) - .asRuntimeException(); - } - } -} diff --git a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/SinkWriterStreamObserver.java b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/SinkWriterStreamObserver.java index cd61da38d6cb5..53dfe326fbd9d 100644 --- a/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/SinkWriterStreamObserver.java +++ b/java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/SinkWriterStreamObserver.java @@ -206,19 +206,7 @@ private void bindSink(ConnectorServiceProto.SinkWriterStreamRequest.StartSink st String connectorName = getConnectorName(sinkParam); SinkFactory sinkFactory = SinkUtils.getSinkFactory(connectorName); sink = sinkFactory.createWriter(tableSchema, sinkParam.getPropertiesMap()); - switch (startSink.getFormat()) { - case FORMAT_UNSPECIFIED: - case UNRECOGNIZED: - throw INVALID_ARGUMENT - .withDescription("should specify payload format in request") - .asRuntimeException(); - case JSON: - deserializer = new JsonDeserializer(tableSchema); - break; - case STREAM_CHUNK: - deserializer = new StreamChunkDeserializer(tableSchema); - break; - } + deserializer = new StreamChunkDeserializer(tableSchema); this.connectorName = connectorName.toUpperCase(); ConnectorNodeMetrics.incActiveSinkConnections(connectorName, "node1"); } diff --git a/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/DeserializerTest.java b/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/DeserializerTest.java deleted file mode 100644 index 9284a2ef8fd20..0000000000000 --- a/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/DeserializerTest.java +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2024 RisingWave Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package com.risingwave.connector.sink; - -import com.risingwave.connector.JsonDeserializer; -import com.risingwave.connector.TestUtils; -import com.risingwave.connector.api.sink.SinkRow; -import com.risingwave.proto.ConnectorServiceProto; -import com.risingwave.proto.ConnectorServiceProto.SinkWriterStreamRequest.WriteBatch.JsonPayload; -import com.risingwave.proto.Data; -import junit.framework.TestCase; - -public class DeserializerTest extends TestCase { - public void testJsonDeserializer() { - JsonDeserializer deserializer = new JsonDeserializer(TestUtils.getMockTableSchema()); - JsonPayload jsonPayload = - JsonPayload.newBuilder() - .addRowOps( - JsonPayload.RowOp.newBuilder() - .setOpType(Data.Op.INSERT) - .setLine("{\"id\": 1, \"name\": \"John\"}") - .build()) - .build(); - ConnectorServiceProto.SinkWriterStreamRequest.WriteBatch writeBatch = - ConnectorServiceProto.SinkWriterStreamRequest.WriteBatch.newBuilder() - .setJsonPayload(jsonPayload) - .build(); - SinkRow outcome = deserializer.deserialize(writeBatch).iterator().next(); - assertEquals(outcome.get(0), 1); - assertEquals(outcome.get(1), "John"); - } -} diff --git a/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/SinkStreamObserverTest.java b/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/SinkStreamObserverTest.java index f0dcc4c1c4930..885fc7eb927a3 100644 --- a/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/SinkStreamObserverTest.java +++ b/java/connector-node/risingwave-connector-test/src/test/java/com/risingwave/connector/sink/SinkStreamObserverTest.java @@ -14,10 +14,10 @@ package com.risingwave.connector.sink; +import com.google.protobuf.ByteString; import com.risingwave.connector.SinkWriterStreamObserver; import com.risingwave.connector.TestUtils; import com.risingwave.proto.ConnectorServiceProto; -import com.risingwave.proto.Data.Op; import io.grpc.stub.StreamObserver; import java.util.Map; import org.junit.Assert; @@ -94,7 +94,6 @@ public void testOnNext_syncValidation() { .setStart( ConnectorServiceProto.SinkWriterStreamRequest.StartSink.newBuilder() .setSinkParam(fileSinkParam) - .setFormat(ConnectorServiceProto.SinkPayloadFormat.JSON) .build()) .build(); ConnectorServiceProto.SinkWriterStreamRequest firstSync = @@ -138,7 +137,6 @@ public void testOnNext_startEpochValidation() { .setStart( ConnectorServiceProto.SinkWriterStreamRequest.StartSink.newBuilder() .setSinkParam(fileSinkParam) - .setFormat(ConnectorServiceProto.SinkPayloadFormat.JSON) .build()) .build(); ConnectorServiceProto.SinkWriterStreamRequest firstSync = @@ -156,6 +154,8 @@ public void testOnNext_startEpochValidation() { sinkWriterStreamObserver.onNext(firstSync); } + // WARN! This test is skipped in CI pipeline see + // `.github/workflows/connector-node-integration.yml` @Test public void testOnNext_writeValidation() { SinkWriterStreamObserver sinkWriterStreamObserver; @@ -164,10 +164,16 @@ public void testOnNext_writeValidation() { ConnectorServiceProto.SinkWriterStreamRequest.newBuilder() .setStart( ConnectorServiceProto.SinkWriterStreamRequest.StartSink.newBuilder() - .setFormat(ConnectorServiceProto.SinkPayloadFormat.JSON) .setSinkParam(fileSinkParam)) .build(); + // Encoded StreamChunk: 1 'test' + byte[] data1 = + new byte[] { + 8, 1, 18, 1, 1, 26, 20, 8, 2, 18, 6, 8, 1, 18, 2, 1, 1, 26, 8, 8, 1, 18, 4, 0, + 0, 0, 1, 26, 42, 8, 6, 18, 6, 8, 1, 18, 2, 1, 1, 26, 20, 8, 1, 18, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 26, 8, 8, 1, 18, 4, 116, 101, 115, 116 + }; ConnectorServiceProto.SinkWriterStreamRequest firstWrite = ConnectorServiceProto.SinkWriterStreamRequest.newBuilder() .setWriteBatch( @@ -175,19 +181,11 @@ public void testOnNext_writeValidation() { .newBuilder() .setEpoch(0) .setBatchId(1) - .setJsonPayload( + .setStreamChunkPayload( ConnectorServiceProto.SinkWriterStreamRequest - .WriteBatch.JsonPayload.newBuilder() - .addRowOps( - ConnectorServiceProto - .SinkWriterStreamRequest - .WriteBatch.JsonPayload - .RowOp.newBuilder() - .setOpType(Op.INSERT) - .setLine( - "{\"id\": 1, \"name\": \"test\"}") - .build())) - .build()) + .WriteBatch.StreamChunkPayload.newBuilder() + .setBinaryData(ByteString.copyFrom(data1)) + .build())) .build(); ConnectorServiceProto.SinkWriterStreamRequest firstSync = @@ -199,6 +197,13 @@ public void testOnNext_writeValidation() { .build()) .build(); + // Encoded StreamChunk: 2 'test' + byte[] data2 = + new byte[] { + 8, 1, 18, 1, 1, 26, 20, 8, 2, 18, 6, 8, 1, 18, 2, 1, 1, 26, 8, 8, 1, 18, 4, 0, + 0, 0, 2, 26, 42, 8, 6, 18, 6, 8, 1, 18, 2, 1, 1, 26, 20, 8, 1, 18, 16, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 26, 8, 8, 1, 18, 4, 116, 101, 115, 116 + }; ConnectorServiceProto.SinkWriterStreamRequest secondWrite = ConnectorServiceProto.SinkWriterStreamRequest.newBuilder() .setWriteBatch( @@ -206,19 +211,11 @@ public void testOnNext_writeValidation() { .newBuilder() .setEpoch(1) .setBatchId(2) - .setJsonPayload( + .setStreamChunkPayload( ConnectorServiceProto.SinkWriterStreamRequest - .WriteBatch.JsonPayload.newBuilder() - .addRowOps( - ConnectorServiceProto - .SinkWriterStreamRequest - .WriteBatch.JsonPayload - .RowOp.newBuilder() - .setOpType(Op.INSERT) - .setLine( - "{\"id\": 2, \"name\": \"test\"}") - .build())) - .build()) + .WriteBatch.StreamChunkPayload.newBuilder() + .setBinaryData(ByteString.copyFrom(data2)) + .build())) .build(); ConnectorServiceProto.SinkWriterStreamRequest secondWriteWrongEpoch = @@ -228,19 +225,11 @@ public void testOnNext_writeValidation() { .newBuilder() .setEpoch(2) .setBatchId(3) - .setJsonPayload( + .setStreamChunkPayload( ConnectorServiceProto.SinkWriterStreamRequest - .WriteBatch.JsonPayload.newBuilder() - .addRowOps( - ConnectorServiceProto - .SinkWriterStreamRequest - .WriteBatch.JsonPayload - .RowOp.newBuilder() - .setOpType(Op.INSERT) - .setLine( - "{\"id\": 2, \"name\": \"test\"}") - .build())) - .build()) + .WriteBatch.StreamChunkPayload.newBuilder() + .setBinaryData(ByteString.copyFrom(data2)) + .build())) .build(); boolean exceptionThrown = false; @@ -251,7 +240,10 @@ public void testOnNext_writeValidation() { sinkWriterStreamObserver.onNext(firstWrite); } catch (RuntimeException e) { exceptionThrown = true; - Assert.assertTrue(e.getMessage().toLowerCase().contains("batch id")); + if (!e.getMessage().toLowerCase().contains("batch id")) { + e.printStackTrace(); + Assert.fail("Expected `batch id`, but got " + e.getMessage()); + } } if (!exceptionThrown) { Assert.fail("Expected exception not thrown: `invalid batch id`"); @@ -267,7 +259,10 @@ public void testOnNext_writeValidation() { sinkWriterStreamObserver.onNext(secondWriteWrongEpoch); } catch (RuntimeException e) { exceptionThrown = true; - Assert.assertTrue(e.getMessage().toLowerCase().contains("invalid epoch")); + if (!e.getMessage().toLowerCase().contains("invalid epoch")) { + e.printStackTrace(); + Assert.fail("Expected `invalid epoch`, but got " + e.getMessage()); + } } if (!exceptionThrown) { Assert.fail( diff --git a/proto/connector_service.proto b/proto/connector_service.proto index 4deb0d6fb6096..da2c2b88087ea 100644 --- a/proto/connector_service.proto +++ b/proto/connector_service.proto @@ -4,7 +4,6 @@ package connector_service; import "catalog.proto"; import "common.proto"; -import "data.proto"; import "plan_common.proto"; option java_outer_classname = "ConnectorServiceProto"; @@ -30,34 +29,21 @@ message SinkParam { string sink_name = 8; } -enum SinkPayloadFormat { - FORMAT_UNSPECIFIED = 0; - JSON = 1; - STREAM_CHUNK = 2; -} - message SinkWriterStreamRequest { message StartSink { SinkParam sink_param = 1; - SinkPayloadFormat format = 2; + // Deprecated: SinkPayloadFormat format = 2; + reserved "format"; + reserved 2; TableSchema payload_schema = 3; } message WriteBatch { - message JsonPayload { - message RowOp { - data.Op op_type = 1; - string line = 2; - } - repeated RowOp row_ops = 1; - } - message StreamChunkPayload { bytes binary_data = 1; } oneof payload { - JsonPayload json_payload = 1; StreamChunkPayload stream_chunk_payload = 2; // This is a reference pointer to a StreamChunk. The StreamChunk is owned // by the JniSinkWriterStreamRequest, which should handle the release of StreamChunk. @@ -65,6 +51,10 @@ message SinkWriterStreamRequest { int64 stream_chunk_ref_pointer = 5; } + // Deprecated in oneof payload: JsonPayload json_payload = 1; + reserved "json_payload"; + reserved 1; + uint64 batch_id = 3; uint64 epoch = 4; } diff --git a/src/bench/sink_bench/main.rs b/src/bench/sink_bench/main.rs index 033cb47ba5a24..cfb506fa2efab 100644 --- a/src/bench/sink_bench/main.rs +++ b/src/bench/sink_bench/main.rs @@ -49,7 +49,6 @@ use risingwave_connector::source::datagen::{ use risingwave_connector::source::{ Column, DataType, SourceContext, SourceEnumeratorContext, SplitEnumerator, SplitReader, }; -use risingwave_pb::connector_service::SinkPayloadFormat; use risingwave_stream::executor::test_utils::prelude::ColumnDesc; use risingwave_stream::executor::{Barrier, Message, MessageStreamItem, StreamExecutorError}; use serde::{Deserialize, Deserializer}; @@ -472,9 +471,8 @@ async fn main() { sink_from_name: "not_need_set".to_string(), }; let sink = build_sink(sink_param).unwrap(); - let mut sink_writer_param = SinkWriterParam::for_test(); + let sink_writer_param = SinkWriterParam::for_test(); println!("Start Sink Bench!, Wait {:?}s", BENCH_TIME); - sink_writer_param.connector_params.sink_payload_format = SinkPayloadFormat::StreamChunk; tokio::spawn(async move { dispatch_sink!(sink, sink, { consume_log_stream(sink, mock_range_log_reader, sink_writer_param).boxed() diff --git a/src/cmd_all/src/standalone.rs b/src/cmd_all/src/standalone.rs index 62030a3c84aff..819aba03d865e 100644 --- a/src/cmd_all/src/standalone.rs +++ b/src/cmd_all/src/standalone.rs @@ -356,7 +356,6 @@ mod test { http://127.0.0.1:5690/, ], ), - connector_rpc_sink_payload_format: None, config_path: "src/config/test.toml", total_memory_bytes: 34359738368, reserved_memory_bytes: None, diff --git a/src/compute/src/lib.rs b/src/compute/src/lib.rs index 344e0c781eb5e..2c25f7808d93d 100644 --- a/src/compute/src/lib.rs +++ b/src/compute/src/lib.rs @@ -76,10 +76,6 @@ pub struct ComputeNodeOpts { #[clap(long, env = "RW_META_ADDR", default_value = "http://127.0.0.1:5690")] pub meta_address: MetaAddressStrategy, - /// Payload format of connector sink rpc - #[clap(long, env = "RW_CONNECTOR_RPC_SINK_PAYLOAD_FORMAT")] - pub connector_rpc_sink_payload_format: Option, - /// The path of `risingwave.toml` configuration file. /// /// If empty, default configuration values will be used. diff --git a/src/compute/src/server.rs b/src/compute/src/server.rs index 07dee5dfa4c8c..b9cd6fd09f751 100644 --- a/src/compute/src/server.rs +++ b/src/compute/src/server.rs @@ -42,7 +42,6 @@ use risingwave_connector::source::monitor::GLOBAL_SOURCE_METRICS; use risingwave_dml::dml_manager::DmlManager; use risingwave_pb::common::WorkerType; use risingwave_pb::compute::config_service_server::ConfigServiceServer; -use risingwave_pb::connector_service::SinkPayloadFormat; use risingwave_pb::health::health_server::HealthServer; use risingwave_pb::meta::add_worker_node_request::Property; use risingwave_pb::monitor_service::monitor_service_server::MonitorServiceServer; @@ -334,28 +333,9 @@ pub async fn compute_node_serve( config.server.metrics_level, ); - info!( - "connector param: payload_format={:?}", - opts.connector_rpc_sink_payload_format - ); - - let connector_params = risingwave_connector::ConnectorParams { - sink_payload_format: match opts.connector_rpc_sink_payload_format.as_deref() { - None | Some("stream_chunk") => SinkPayloadFormat::StreamChunk, - Some("json") => SinkPayloadFormat::Json, - _ => { - unreachable!( - "invalid sink payload format: {:?}. Should be either json or stream_chunk", - opts.connector_rpc_sink_payload_format - ) - } - }, - }; - // Initialize the streaming environment. let stream_env = StreamEnvironment::new( advertise_addr.clone(), - connector_params, stream_config, worker_id, state_store, diff --git a/src/connector/src/lib.rs b/src/connector/src/lib.rs index 70de9f8561a76..c866a68b298d6 100644 --- a/src/connector/src/lib.rs +++ b/src/connector/src/lib.rs @@ -41,7 +41,6 @@ use std::time::Duration; use duration_str::parse_std; -use risingwave_pb::connector_service::SinkPayloadFormat; use serde::de; pub mod aws_utils; @@ -64,19 +63,6 @@ pub use with_options::WithPropertiesExt; #[cfg(test)] mod with_options_test; -#[derive(Clone, Debug, Default)] -pub struct ConnectorParams { - pub sink_payload_format: SinkPayloadFormat, -} - -impl ConnectorParams { - pub fn new(sink_payload_format: SinkPayloadFormat) -> Self { - Self { - sink_payload_format, - } - } -} - pub(crate) fn deserialize_u32_from_string<'de, D>(deserializer: D) -> Result where D: de::Deserializer<'de>, diff --git a/src/connector/src/sink/mod.rs b/src/connector/src/sink/mod.rs index d281a97ef6c26..2ef4bb953b67e 100644 --- a/src/connector/src/sink/mod.rs +++ b/src/connector/src/sink/mod.rs @@ -71,7 +71,6 @@ use crate::sink::catalog::desc::SinkDesc; use crate::sink::catalog::{SinkCatalog, SinkId}; use crate::sink::log_store::{LogReader, LogStoreReadItem, LogStoreResult, TruncateOffset}; use crate::sink::writer::SinkWriter; -use crate::ConnectorParams; const BOUNDED_CHANNEL_SIZE: usize = 16; #[macro_export] @@ -288,7 +287,6 @@ impl SinkMetrics { #[derive(Clone)] pub struct SinkWriterParam { - pub connector_params: ConnectorParams, pub executor_id: u64, pub vnode_bitmap: Option, pub meta_client: Option, @@ -326,7 +324,6 @@ impl SinkMetaClient { impl SinkWriterParam { pub fn for_test() -> Self { SinkWriterParam { - connector_params: Default::default(), executor_id: Default::default(), vnode_bitmap: Default::default(), meta_client: Default::default(), diff --git a/src/connector/src/sink/remote.rs b/src/connector/src/sink/remote.rs index 3ab7e90a69367..679434d08b194 100644 --- a/src/connector/src/sink/remote.rs +++ b/src/connector/src/sink/remote.rs @@ -42,8 +42,8 @@ use risingwave_pb::connector_service::sink_writer_stream_request::{ use risingwave_pb::connector_service::{ sink_coordinator_stream_request, sink_coordinator_stream_response, sink_writer_stream_response, PbSinkParam, SinkCoordinatorStreamRequest, SinkCoordinatorStreamResponse, SinkMetadata, - SinkPayloadFormat, SinkWriterStreamRequest, SinkWriterStreamResponse, TableSchema, - ValidateSinkRequest, ValidateSinkResponse, + SinkWriterStreamRequest, SinkWriterStreamResponse, TableSchema, ValidateSinkRequest, + ValidateSinkResponse, }; use risingwave_rpc_client::error::RpcError; use risingwave_rpc_client::{ @@ -68,7 +68,6 @@ use crate::sink::{ DummySinkCommitCoordinator, LogSinker, Result, Sink, SinkCommitCoordinator, SinkError, SinkLogReader, SinkMetrics, SinkParam, SinkWriterParam, }; -use crate::ConnectorParams; macro_rules! def_remote_sink { () => { @@ -82,7 +81,6 @@ macro_rules! def_remote_sink { { HttpJava, HttpJavaSink, "http" } } }; - () => {}; ({ $variant_name:ident, $sink_type_name:ident, $sink_name:expr }) => { #[derive(Debug)] pub struct $variant_name; @@ -283,7 +281,7 @@ impl RemoteLogSinker { request_sender, response_stream, } = EmbeddedConnectorClient::new()? - .start_sink_writer_stream(payload_schema, sink_proto, SinkPayloadFormat::StreamChunk) + .start_sink_writer_stream(payload_schema, sink_proto) .await?; let sink_metrics = writer_param.sink_metrics; @@ -547,12 +545,8 @@ impl Sink for CoordinatedRemoteSink { "sink needs coordination should not have singleton input" )) })?, - CoordinatedRemoteSinkWriter::new( - self.param.clone(), - writer_param.connector_params, - writer_param.sink_metrics.clone(), - ) - .await?, + CoordinatedRemoteSinkWriter::new(self.param.clone(), writer_param.sink_metrics.clone()) + .await?, ) .await? .into_log_sinker(writer_param.sink_metrics)) @@ -572,18 +566,10 @@ pub struct CoordinatedRemoteSinkWriter { } impl CoordinatedRemoteSinkWriter { - pub async fn new( - param: SinkParam, - connector_params: ConnectorParams, - sink_metrics: SinkMetrics, - ) -> Result { + pub async fn new(param: SinkParam, sink_metrics: SinkMetrics) -> Result { let sink_proto = param.to_proto(); let stream_handle = EmbeddedConnectorClient::new()? - .start_sink_writer_stream( - sink_proto.table_schema.clone(), - sink_proto, - connector_params.sink_payload_format, - ) + .start_sink_writer_stream(sink_proto.table_schema.clone(), sink_proto) .await?; Ok(Self { @@ -717,13 +703,11 @@ impl EmbeddedConnectorClient { &self, payload_schema: Option, sink_proto: PbSinkParam, - sink_payload_format: SinkPayloadFormat, ) -> Result> { let (handle, first_rsp) = SinkWriterStreamHandle::initialize( SinkWriterStreamRequest { request: Some(SinkRequest::Start(StartSink { sink_param: Some(sink_proto), - format: sink_payload_format as i32, payload_schema, })), }, diff --git a/src/connector/src/source/reader/desc.rs b/src/connector/src/source/reader/desc.rs index 808ef1232c50c..cbd63a2a4906a 100644 --- a/src/connector/src/source/reader/desc.rs +++ b/src/connector/src/source/reader/desc.rs @@ -30,7 +30,6 @@ use crate::parser::additional_columns::add_partition_offset_cols; use crate::parser::{EncodingProperties, ProtocolProperties, SpecificParserConfig}; use crate::source::monitor::SourceMetrics; use crate::source::{SourceColumnDesc, SourceColumnType, UPSTREAM_SOURCE_KEY}; -use crate::ConnectorParams; pub const DEFAULT_CONNECTOR_MESSAGE_BUFFER_SIZE: usize = 16; @@ -59,7 +58,6 @@ pub struct SourceDescBuilder { row_id_index: Option, with_properties: HashMap, source_info: PbStreamSourceInfo, - connector_params: ConnectorParams, connector_message_buffer_size: usize, pk_indices: Vec, } @@ -72,7 +70,6 @@ impl SourceDescBuilder { row_id_index: Option, with_properties: HashMap, source_info: PbStreamSourceInfo, - connector_params: ConnectorParams, connector_message_buffer_size: usize, pk_indices: Vec, ) -> Self { @@ -82,7 +79,6 @@ impl SourceDescBuilder { row_id_index, with_properties, source_info, - connector_params, connector_message_buffer_size, pk_indices, } @@ -223,7 +219,6 @@ pub mod test_utils { row_id_index, with_properties, source_info, - connector_params: Default::default(), connector_message_buffer_size: DEFAULT_CONNECTOR_MESSAGE_BUFFER_SIZE, pk_indices, } diff --git a/src/rpc_client/src/connector_client.rs b/src/rpc_client/src/connector_client.rs index f4a87e3c1f5b4..3042394b25877 100644 --- a/src/rpc_client/src/connector_client.rs +++ b/src/rpc_client/src/connector_client.rs @@ -278,7 +278,6 @@ impl ConnectorClient { &self, payload_schema: Option, sink_proto: PbSinkParam, - sink_payload_format: SinkPayloadFormat, ) -> Result { let mut rpc_client = self.rpc_client.clone(); let (handle, first_rsp) = SinkWriterStreamHandle::initialize( @@ -286,7 +285,6 @@ impl ConnectorClient { request: Some(SinkRequest::Start(StartSink { payload_schema, sink_param: Some(sink_proto), - format: sink_payload_format as i32, })), }, |rx| async move { diff --git a/src/stream/src/from_proto/sink.rs b/src/stream/src/from_proto/sink.rs index 541dce562ba04..5e77be7beb7a0 100644 --- a/src/stream/src/from_proto/sink.rs +++ b/src/stream/src/from_proto/sink.rs @@ -180,7 +180,6 @@ impl ExecutorBuilder for SinkExecutorBuilder { ); let sink_write_param = SinkWriterParam { - connector_params: params.env.connector_params(), executor_id: params.executor_id, vnode_bitmap: params.vnode_bitmap.clone(), meta_client: params.env.meta_client().map(SinkMetaClient::MetaClient), diff --git a/src/stream/src/from_proto/source/fs_fetch.rs b/src/stream/src/from_proto/source/fs_fetch.rs index 6dd2be7263c29..1951365a47eed 100644 --- a/src/stream/src/from_proto/source/fs_fetch.rs +++ b/src/stream/src/from_proto/source/fs_fetch.rs @@ -53,7 +53,6 @@ impl ExecutorBuilder for FsFetchExecutorBuilder { source.row_id_index.map(|x| x as _), source.with_properties.clone(), source_info.clone(), - params.env.connector_params(), params.env.config().developer.connector_message_buffer_size, params.info.pk_indices.clone(), ); diff --git a/src/stream/src/from_proto/source/trad_source.rs b/src/stream/src/from_proto/source/trad_source.rs index 0013792d51326..8c00fb0a50830 100644 --- a/src/stream/src/from_proto/source/trad_source.rs +++ b/src/stream/src/from_proto/source/trad_source.rs @@ -118,7 +118,6 @@ pub fn create_source_desc_builder( row_id_index.map(|x| x as _), with_properties, source_info, - params.env.connector_params(), params.env.config().developer.connector_message_buffer_size, // `pk_indices` is used to ensure that a message will be skipped instead of parsed // with null pk when the pk column is missing. diff --git a/src/stream/src/task/env.rs b/src/stream/src/task/env.rs index 9a0b26f25f0c5..a47eb8279224c 100644 --- a/src/stream/src/task/env.rs +++ b/src/stream/src/task/env.rs @@ -19,7 +19,6 @@ use risingwave_common::config::StreamingConfig; use risingwave_common::system_param::local_manager::LocalSystemParamsManagerRef; use risingwave_common::util::addr::HostAddr; use risingwave_connector::source::monitor::SourceMetrics; -use risingwave_connector::ConnectorParams; use risingwave_dml::dml_manager::DmlManagerRef; use risingwave_rpc_client::MetaClient; use risingwave_storage::StateStoreImpl; @@ -33,9 +32,6 @@ pub struct StreamEnvironment { /// Endpoint the stream manager listens on. server_addr: HostAddr, - /// Parameters used by connector nodes. - connector_params: ConnectorParams, - /// Streaming related configurations. config: Arc, @@ -65,7 +61,6 @@ impl StreamEnvironment { #[allow(clippy::too_many_arguments)] pub fn new( server_addr: HostAddr, - connector_params: ConnectorParams, config: Arc, worker_id: WorkerNodeId, state_store: StateStoreImpl, @@ -76,7 +71,6 @@ impl StreamEnvironment { ) -> Self { StreamEnvironment { server_addr, - connector_params, config, worker_id, state_store, @@ -93,11 +87,9 @@ impl StreamEnvironment { pub fn for_test() -> Self { use risingwave_common::system_param::local_manager::LocalSystemParamsManager; use risingwave_dml::dml_manager::DmlManager; - use risingwave_pb::connector_service::SinkPayloadFormat; use risingwave_storage::monitor::MonitoredStorageMetrics; StreamEnvironment { server_addr: "127.0.0.1:5688".parse().unwrap(), - connector_params: ConnectorParams::new(SinkPayloadFormat::Json), config: Arc::new(StreamingConfig::default()), worker_id: WorkerNodeId::default(), state_store: StateStoreImpl::shared_in_memory_store(Arc::new( @@ -127,10 +119,6 @@ impl StreamEnvironment { self.state_store.clone() } - pub fn connector_params(&self) -> ConnectorParams { - self.connector_params.clone() - } - pub fn dml_manager_ref(&self) -> DmlManagerRef { self.dml_manager.clone() } From c1cc535501117f31e0e3044592170e94fffa11e2 Mon Sep 17 00:00:00 2001 From: Li0k Date: Tue, 14 May 2024 15:06:56 +0800 Subject: [PATCH 09/11] fix(object_store): fix sdk error downcast (#16743) --- src/object_store/src/object/s3.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/object_store/src/object/s3.rs b/src/object_store/src/object/s3.rs index 6832b3a8e04da..d7c151e867465 100644 --- a/src/object_store/src/object/s3.rs +++ b/src/object_store/src/object/s3.rs @@ -1006,10 +1006,10 @@ where } => { let sdk_err = inner .as_ref() - .downcast_ref::>>() - .unwrap(); + .downcast_ref::>>(); + let err_should_retry = match sdk_err { - SdkError::DispatchFailure(e) => { + Some(SdkError::DispatchFailure(e)) => { if e.is_timeout() { tracing::warn!(target: "http_timeout_retry", "{e:?} occurs, retry S3 get_object request."); true @@ -1017,7 +1017,8 @@ where false } } - SdkError::ServiceError(e) => { + + Some(SdkError::ServiceError(e)) => { let retry = match e.err().code() { None => { if config.s3.developer.object_store_retry_unknown_service_error @@ -1048,7 +1049,7 @@ where retry } - SdkError::TimeoutError(_err) => true, + Some(SdkError::TimeoutError(_err)) => true, _ => false, }; From 44f990bdefe522e6ec32496dbf843ff75bf47c15 Mon Sep 17 00:00:00 2001 From: August Date: Tue, 14 May 2024 15:30:13 +0800 Subject: [PATCH 10/11] feat: support pg service & pg meta backend in risedev using docker (#16662) --- risedev.yml | 98 +++++++++++++++++++ src/cmd_all/src/single_node.rs | 2 +- src/meta/node/src/lib.rs | 8 +- src/risedevtool/src/bin/risedev-compose.rs | 2 +- src/risedevtool/src/bin/risedev-dev.rs | 15 ++- src/risedevtool/src/config.rs | 1 + src/risedevtool/src/risedev_env.rs | 11 +++ src/risedevtool/src/service_config.rs | 26 +++++ src/risedevtool/src/task.rs | 2 + src/risedevtool/src/task/meta_node_service.rs | 81 +++++++++------ src/risedevtool/src/task/postgres_service.rs | 55 +++++++++++ 11 files changed, 266 insertions(+), 35 deletions(-) create mode 100644 src/risedevtool/src/task/postgres_service.rs diff --git a/risedev.yml b/risedev.yml index 7c57eb1601666..1e3d106736772 100644 --- a/risedev.yml +++ b/risedev.yml @@ -105,6 +105,7 @@ profile: - use: minio - use: etcd - use: meta-node + meta-backend: etcd - use: compute-node - use: frontend - use: compactor @@ -119,6 +120,7 @@ profile: - use: etcd - use: meta-node user-managed: true + meta-backend: etcd - use: compute-node user-managed: true - use: frontend @@ -136,6 +138,7 @@ profile: - use: etcd - use: meta-node user-managed: true + meta-backend: etcd - use: compute-node user-managed: true - use: frontend @@ -149,6 +152,7 @@ profile: - use: etcd - use: meta-node user-managed: true + meta-backend: etcd - use: compute-node user-managed: true - use: frontend @@ -253,6 +257,7 @@ profile: - use: minio - use: etcd - use: meta-node + meta-backend: etcd - use: compute-node - use: frontend - use: compactor @@ -286,6 +291,7 @@ profile: port: 5690 dashboard-port: 5691 exporter-port: 1250 + meta-backend: etcd - use: meta-node port: 15690 dashboard-port: 15691 @@ -333,6 +339,7 @@ profile: port: 5690 dashboard-port: 5691 exporter-port: 1250 + meta-backend: etcd - use: meta-node port: 15690 dashboard-port: 15691 @@ -353,6 +360,7 @@ profile: port: 5690 dashboard-port: 5691 exporter-port: 1250 + meta-backend: sqlite - use: compactor - use: compute-node - use: frontend @@ -366,6 +374,40 @@ profile: port: 5690 dashboard-port: 5691 exporter-port: 1250 + meta-backend: sqlite + - use: compactor + - use: compute-node + - use: frontend + + meta-1cn-1fe-pg-backend: + steps: + - use: minio + - use: postgres + port: 8432 + user: postgres + database: metadata + - use: meta-node + port: 5690 + dashboard-port: 5691 + exporter-port: 1250 + meta-backend: postgres + - use: compactor + - use: compute-node + - use: frontend + + meta-1cn-1fe-pg-backend-with-recovery: + config-path: src/config/ci-recovery.toml + steps: + - use: minio + - use: postgres + port: 8432 + user: postgres + database: metadata + - use: meta-node + port: 5690 + dashboard-port: 5691 + exporter-port: 1250 + meta-backend: postgres - use: compactor - use: compute-node - use: frontend @@ -392,6 +434,7 @@ profile: - use: minio - use: etcd - use: meta-node + meta-backend: etcd - use: compute-node parallelism: 8 - use: frontend @@ -550,6 +593,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: compute-node enable-tiered-cache: true - use: frontend @@ -562,6 +606,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: compute-node port: 5687 exporter-port: 1222 @@ -584,6 +629,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: compute-node port: 5687 exporter-port: 1222 @@ -610,6 +656,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: compute-node port: 5687 exporter-port: 1222 @@ -634,6 +681,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: compute-node port: 5687 exporter-port: 1222 @@ -658,6 +706,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: compute-node port: 5687 exporter-port: 1222 @@ -717,6 +766,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: opendal engine: fs bucket: "/tmp/rw_ci" @@ -750,6 +800,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: compute-node port: 5687 exporter-port: 1222 @@ -801,6 +852,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: compute-node enable-tiered-cache: true - use: frontend @@ -817,6 +869,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: compute-node enable-tiered-cache: true - use: frontend @@ -841,6 +894,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: compute-node enable-tiered-cache: true - use: frontend @@ -854,6 +908,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: compute-node enable-tiered-cache: true total-memory-bytes: 17179869184 @@ -866,6 +921,7 @@ profile: - use: minio - use: etcd - use: meta-node + meta-backend: etcd - use: compute-node enable-tiered-cache: true - use: frontend @@ -878,6 +934,7 @@ profile: - use: etcd unsafe-no-fsync: true - use: meta-node + meta-backend: etcd - use: compute-node port: 5687 exporter-port: 1222 @@ -899,6 +956,7 @@ profile: - use: minio - use: etcd - use: meta-node + meta-backend: etcd - use: compute-node enable-tiered-cache: true - use: frontend @@ -912,6 +970,7 @@ profile: - use: etcd - use: minio - use: meta-node + meta-backend: etcd - use: compute-node - use: frontend - use: compactor @@ -928,6 +987,7 @@ profile: - use: sqlite - use: minio - use: meta-node + meta-backend: sqlite - use: compute-node - use: frontend - use: compactor @@ -978,6 +1038,7 @@ profile: - use: minio - use: etcd - use: meta-node + meta-backend: etcd - use: compute-node - use: frontend - use: compactor @@ -989,6 +1050,7 @@ profile: bucket: renjie-iceberg-bench - use: etcd - use: meta-node + meta-backend: etcd - use: compute-node - use: frontend - use: compactor @@ -1001,6 +1063,7 @@ profile: - use: minio - use: etcd - use: meta-node + meta-backend: etcd - use: compute-node - use: frontend - use: compactor @@ -1161,12 +1224,18 @@ template: # If `user-managed` is true, this service will be started by user with the above config user-managed: false + # meta backend type, requires extra config for provided backend + meta-backend: "memory" + # Etcd backend config provide-etcd-backend: "etcd*" # Sqlite backend config provide-sqlite-backend: "sqlite*" + # Postgres backend config + provide-postgres-backend: "postgres*" + # Prometheus nodes used by dashboard service provide-prometheus: "prometheus*" @@ -1444,3 +1513,32 @@ template: # If `user-managed` is true, user is responsible for starting the service # to serve at the above address and port in any way they see fit. user-managed: false + + # PostgreSQL service backed by docker. + postgres: + # Id to be picked-up by services + id: postgres-${port} + + # address of pg + address: "127.0.0.1" + + # listen port of pg + port: 8432 + + # Note: + # - This will be used to initialize the PostgreSQL instance if it's fresh. + # - In user-managed mode, these configs are not validated by risedev. + # They are passed as-is to risedev-env default user for PostgreSQL operations. + user: postgres + password: "" + database: "postgres" + + # The docker image. Can be overridden to use a different version. + image: "postgres:15-alpine" + + # If set to true, data will be persisted at data/{id}. + persist-data: true + + # If `user-managed` is true, user is responsible for starting the service + # to serve at the above address and port in any way they see fit. + user-managed: false diff --git a/src/cmd_all/src/single_node.rs b/src/cmd_all/src/single_node.rs index 63099c1ff640c..2340841a13c31 100644 --- a/src/cmd_all/src/single_node.rs +++ b/src/cmd_all/src/single_node.rs @@ -186,7 +186,7 @@ pub fn map_single_node_opts_to_standalone_opts(opts: SingleNodeOpts) -> ParsedSt std::fs::create_dir_all(&meta_store_dir).unwrap(); let meta_store_endpoint = format!("sqlite://{}/single_node.db?mode=rwc", &meta_store_dir); - meta_opts.sql_endpoint = Some(meta_store_endpoint); + meta_opts.sql_endpoint = Some(meta_store_endpoint.into()); } } diff --git a/src/meta/node/src/lib.rs b/src/meta/node/src/lib.rs index 140cf481e3c40..c7d8e95d7e49d 100644 --- a/src/meta/node/src/lib.rs +++ b/src/meta/node/src/lib.rs @@ -76,7 +76,7 @@ pub struct MetaNodeOpts { /// Endpoint of the SQL service, make it non-option when SQL service is required. #[clap(long, hide = true, env = "RW_SQL_ENDPOINT")] - pub sql_endpoint: Option, + pub sql_endpoint: Option>, /// The HTTP REST-API address of the Prometheus instance associated to this cluster. /// This address is used to serve `PromQL` queries to Prometheus. @@ -221,7 +221,11 @@ pub fn start(opts: MetaNodeOpts) -> Pin + Send>> { }, MetaBackend::Mem => MetaStoreBackend::Mem, MetaBackend::Sql => MetaStoreBackend::Sql { - endpoint: opts.sql_endpoint.expect("sql endpoint is required"), + endpoint: opts + .sql_endpoint + .expect("sql endpoint is required") + .expose_secret() + .to_string(), }, }; diff --git a/src/risedevtool/src/bin/risedev-compose.rs b/src/risedevtool/src/bin/risedev-compose.rs index 10b29e836c66d..748da07a9d35e 100644 --- a/src/risedevtool/src/bin/risedev-compose.rs +++ b/src/risedevtool/src/bin/risedev-compose.rs @@ -219,7 +219,7 @@ fn main() -> Result<()> { volumes.insert(c.id.clone(), ComposeVolume::default()); (c.address.clone(), c.compose(&compose_config)?) } - ServiceConfig::Redis(_) | ServiceConfig::MySql(_) => { + ServiceConfig::Redis(_) | ServiceConfig::MySql(_) | ServiceConfig::Postgres(_) => { return Err(anyhow!("not supported")) } }; diff --git a/src/risedevtool/src/bin/risedev-dev.rs b/src/risedevtool/src/bin/risedev-dev.rs index f11bc1b3b5148..4f636cb7e30ba 100644 --- a/src/risedevtool/src/bin/risedev-dev.rs +++ b/src/risedevtool/src/bin/risedev-dev.rs @@ -26,8 +26,9 @@ use risedev::util::{complete_spin, fail_spin}; use risedev::{ generate_risedev_env, preflight_check, CompactorService, ComputeNodeService, ConfigExpander, ConfigureTmuxTask, DummyService, EnsureStopService, ExecuteContext, FrontendService, - GrafanaService, KafkaService, MetaNodeService, MinioService, MySqlService, PrometheusService, - PubsubService, RedisService, ServiceConfig, SqliteConfig, Task, TempoService, RISEDEV_NAME, + GrafanaService, KafkaService, MetaNodeService, MinioService, MySqlService, PostgresService, + PrometheusService, PubsubService, RedisService, ServiceConfig, SqliteConfig, Task, + TempoService, RISEDEV_NAME, }; use tempfile::tempdir; use thiserror_ext::AsReport; @@ -314,6 +315,16 @@ fn task_main( ctx.pb .set_message(format!("mysql {}:{}", c.address, c.port)); } + ServiceConfig::Postgres(c) => { + let mut ctx = + ExecuteContext::new(&mut logger, manager.new_progress(), status_dir.clone()); + PostgresService::new(c.clone()).execute(&mut ctx)?; + let mut task = + risedev::ConfigureTcpNodeTask::new(c.address.clone(), c.port, c.user_managed)?; + task.execute(&mut ctx)?; + ctx.pb + .set_message(format!("postgres {}:{}", c.address, c.port)); + } } let service_id = service.id().to_string(); diff --git a/src/risedevtool/src/config.rs b/src/risedevtool/src/config.rs index 541f269ee18c4..839ebc22486ee 100644 --- a/src/risedevtool/src/config.rs +++ b/src/risedevtool/src/config.rs @@ -174,6 +174,7 @@ impl ConfigExpander { "redis" => ServiceConfig::Redis(serde_yaml::from_str(&out_str)?), "redpanda" => ServiceConfig::RedPanda(serde_yaml::from_str(&out_str)?), "mysql" => ServiceConfig::MySql(serde_yaml::from_str(&out_str)?), + "postgres" => ServiceConfig::Postgres(serde_yaml::from_str(&out_str)?), other => return Err(anyhow!("unsupported use type: {}", other)), }; Ok(result) diff --git a/src/risedevtool/src/risedev_env.rs b/src/risedevtool/src/risedev_env.rs index 729279328e93b..b33a65f9986aa 100644 --- a/src/risedevtool/src/risedev_env.rs +++ b/src/risedevtool/src/risedev_env.rs @@ -96,6 +96,17 @@ pub fn generate_risedev_env(services: &Vec) -> String { let port = &c.port; writeln!(env, r#"RISEDEV_PUBSUB_WITH_OPTIONS_COMMON="connector='google_pubsub',pubsub.emulator_host='{address}:{port}'""#,).unwrap(); } + ServiceConfig::Postgres(c) => { + let host = &c.address; + let port = &c.port; + let user = &c.user; + let password = &c.password; + // These envs are used by `postgres` cli. + writeln!(env, r#"PGHOST="{host}""#,).unwrap(); + writeln!(env, r#"PGPORT="{port}""#,).unwrap(); + writeln!(env, r#"PGUSER="{user}""#,).unwrap(); + writeln!(env, r#"PGPASSWORD="{password}""#,).unwrap(); + } _ => {} } } diff --git a/src/risedevtool/src/service_config.rs b/src/risedevtool/src/service_config.rs index 3135d7af4c009..4811ef96f5b87 100644 --- a/src/risedevtool/src/service_config.rs +++ b/src/risedevtool/src/service_config.rs @@ -60,8 +60,10 @@ pub struct MetaNodeConfig { pub user_managed: bool, + pub meta_backend: String, pub provide_etcd_backend: Option>, pub provide_sqlite_backend: Option>, + pub provide_postgres_backend: Option>, pub provide_prometheus: Option>, pub provide_compute_node: Option>, @@ -341,6 +343,26 @@ pub struct MySqlConfig { pub persist_data: bool, } +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case")] +#[serde(deny_unknown_fields)] +pub struct PostgresConfig { + #[serde(rename = "use")] + phantom_use: Option, + pub id: String, + + pub port: u16, + pub address: String, + + pub user: String, + pub password: String, + pub database: String, + + pub image: String, + pub user_managed: bool, + pub persist_data: bool, +} + /// All service configuration #[derive(Clone, Debug, PartialEq)] pub enum ServiceConfig { @@ -361,6 +383,7 @@ pub enum ServiceConfig { Redis(RedisConfig), RedPanda(RedPandaConfig), MySql(MySqlConfig), + Postgres(PostgresConfig), } impl ServiceConfig { @@ -383,6 +406,7 @@ impl ServiceConfig { Self::RedPanda(c) => &c.id, Self::Opendal(c) => &c.id, Self::MySql(c) => &c.id, + ServiceConfig::Postgres(c) => &c.id, } } @@ -405,6 +429,7 @@ impl ServiceConfig { Self::RedPanda(_c) => None, Self::Opendal(_) => None, Self::MySql(c) => Some(c.port), + ServiceConfig::Postgres(c) => Some(c.port), } } @@ -427,6 +452,7 @@ impl ServiceConfig { Self::RedPanda(_c) => false, Self::Opendal(_c) => false, Self::MySql(c) => c.user_managed, + Self::Postgres(c) => c.user_managed, } } } diff --git a/src/risedevtool/src/task.rs b/src/risedevtool/src/task.rs index 24474d14c600f..fbabc6a27d226 100644 --- a/src/risedevtool/src/task.rs +++ b/src/risedevtool/src/task.rs @@ -25,6 +25,7 @@ mod kafka_service; mod meta_node_service; mod minio_service; mod mysql_service; +mod postgres_service; mod prometheus_service; mod pubsub_service; mod redis_service; @@ -62,6 +63,7 @@ pub use self::kafka_service::*; pub use self::meta_node_service::*; pub use self::minio_service::*; pub use self::mysql_service::*; +pub use self::postgres_service::*; pub use self::prometheus_service::*; pub use self::pubsub_service::*; pub use self::redis_service::*; diff --git a/src/risedevtool/src/task/meta_node_service.rs b/src/risedevtool/src/task/meta_node_service.rs index 42d555f48b3d8..a1bbd2c04e6ca 100644 --- a/src/risedevtool/src/task/meta_node_service.rs +++ b/src/risedevtool/src/task/meta_node_service.rs @@ -77,35 +77,58 @@ impl MetaNodeService { let mut is_persistent_meta_store = false; - if let Some(sqlite_config) = &config.provide_sqlite_backend - && !sqlite_config.is_empty() - { - is_persistent_meta_store = true; - let prefix_data = env::var("PREFIX_DATA")?; - let file_path = PathBuf::from(&prefix_data) - .join(&sqlite_config[0].id) - .join(&sqlite_config[0].file); - cmd.arg("--backend") - .arg("sql") - .arg("--sql-endpoint") - .arg(format!("sqlite://{}?mode=rwc", file_path.display())); - } else { - match config.provide_etcd_backend.as_ref().unwrap().as_slice() { - [] => { - cmd.arg("--backend").arg("mem"); - } - etcds => { - is_persistent_meta_store = true; - cmd.arg("--backend") - .arg("etcd") - .arg("--etcd-endpoints") - .arg( - etcds - .iter() - .map(|etcd| format!("{}:{}", etcd.address, etcd.port)) - .join(","), - ); - } + match config.meta_backend.to_ascii_lowercase().as_str() { + "memory" => { + cmd.arg("--backend").arg("mem"); + } + "etcd" => { + let etcd_config = config.provide_etcd_backend.as_ref().unwrap(); + assert!(!etcd_config.is_empty()); + is_persistent_meta_store = true; + + cmd.arg("--backend") + .arg("etcd") + .arg("--etcd-endpoints") + .arg( + etcd_config + .iter() + .map(|etcd| format!("{}:{}", etcd.address, etcd.port)) + .join(","), + ); + } + "sqlite" => { + let sqlite_config = config.provide_sqlite_backend.as_ref().unwrap(); + assert_eq!(sqlite_config.len(), 1); + is_persistent_meta_store = true; + + let prefix_data = env::var("PREFIX_DATA")?; + let file_path = PathBuf::from(&prefix_data) + .join(&sqlite_config[0].id) + .join(&sqlite_config[0].file); + cmd.arg("--backend") + .arg("sql") + .arg("--sql-endpoint") + .arg(format!("sqlite://{}?mode=rwc", file_path.display())); + } + "postgres" => { + let pg_config = config.provide_postgres_backend.as_ref().unwrap(); + assert_eq!(pg_config.len(), 1); + is_persistent_meta_store = true; + + cmd.arg("--backend") + .arg("sql") + .arg("--sql-endpoint") + .arg(format!( + "postgres://{}:{}@{}:{}/{}", + pg_config[0].user, + pg_config[0].password, + pg_config[0].address, + pg_config[0].port, + pg_config[0].database + )); + } + backend => { + return Err(anyhow!("unsupported meta backend {}", backend)); } } diff --git a/src/risedevtool/src/task/postgres_service.rs b/src/risedevtool/src/task/postgres_service.rs new file mode 100644 index 0000000000000..4a6d6571c82cc --- /dev/null +++ b/src/risedevtool/src/task/postgres_service.rs @@ -0,0 +1,55 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::task::docker_service::{DockerService, DockerServiceConfig}; +use crate::PostgresConfig; + +impl DockerServiceConfig for PostgresConfig { + fn id(&self) -> String { + self.id.clone() + } + + fn is_user_managed(&self) -> bool { + self.user_managed + } + + fn image(&self) -> String { + self.image.clone() + } + + fn envs(&self) -> Vec<(String, String)> { + vec![ + ("POSTGRES_HOST_AUTH_METHOD".to_owned(), "trust".to_owned()), + ("POSTGRES_USER".to_owned(), self.user.clone()), + ("POSTGRES_PASSWORD".to_owned(), self.password.clone()), + ("POSTGRES_DB".to_owned(), self.database.clone()), + ( + "POSTGRES_INITDB_ARGS".to_owned(), + "--encoding=UTF-8 --lc-collate=C --lc-ctype=C".to_owned(), + ), + ] + } + + fn ports(&self) -> Vec<(String, String)> { + vec![(format!("{}:{}", self.address, self.port), "5432".to_owned())] + } + + fn data_path(&self) -> Option { + self.persist_data + .then(|| "/var/lib/postgresql/data".to_owned()) + } +} + +/// Docker-backed PostgreSQL service. +pub type PostgresService = DockerService; From 0971e55de2e7e81c464b27652382a48a23b627c6 Mon Sep 17 00:00:00 2001 From: August Date: Tue, 14 May 2024 17:21:09 +0800 Subject: [PATCH 11/11] fix: avoid panic when auto scaling hit the maximum number of bind parameters in sql backend (#16750) --- src/meta/src/controller/streaming_job.rs | 26 +++++++++--------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/src/meta/src/controller/streaming_job.rs b/src/meta/src/controller/streaming_job.rs index c501c14252ffe..db9c1d03eab4b 100644 --- a/src/meta/src/controller/streaming_job.rs +++ b/src/meta/src/controller/streaming_job.rs @@ -1253,10 +1253,6 @@ impl CatalogController { .exec(&txn) .await?; - // newly created actor - let mut new_actors = vec![]; - let mut new_actor_dispatchers = vec![]; - for ( PbStreamActor { actor_id, @@ -1276,6 +1272,7 @@ impl CatalogController { ) in newly_created_actors { let mut actor_upstreams = BTreeMap::>::new(); + let mut new_actor_dispatchers = vec![]; if let Some(nodes) = &mut nodes { visit_stream_node(nodes, |node| { @@ -1314,7 +1311,7 @@ impl CatalogController { .get(&actor_id) .map(|splits| splits.iter().map(PbConnectorSplit::from).collect_vec()); - new_actors.push(actor::ActiveModel { + Actor::insert(actor::ActiveModel { actor_id: Set(actor_id as _), fragment_id: Set(fragment_id as _), status: Set(ActorStatus::Running), @@ -1324,7 +1321,9 @@ impl CatalogController { upstream_actor_ids: Set(actor_upstreams), vnode_bitmap: Set(vnode_bitmap.as_ref().map(|bitmap| bitmap.into())), expr_context: Set(expr_context.as_ref().unwrap().into()), - }); + }) + .exec(&txn) + .await?; for PbDispatcher { r#type: dispatcher_type, @@ -1348,16 +1347,11 @@ impl CatalogController { downstream_actor_ids: Set(downstream_actor_id.into()), }) } - } - - if !new_actors.is_empty() { - Actor::insert_many(new_actors).exec(&txn).await?; - } - - if !new_actor_dispatchers.is_empty() { - ActorDispatcher::insert_many(new_actor_dispatchers) - .exec(&txn) - .await?; + if !new_actor_dispatchers.is_empty() { + ActorDispatcher::insert_many(new_actor_dispatchers) + .exec(&txn) + .await?; + } } // actor update