From 5f66a96da17a13656075c57277be8dc28eeae22f Mon Sep 17 00:00:00 2001 From: Patrick Huang Date: Tue, 26 Sep 2023 19:46:24 +0800 Subject: [PATCH 1/4] fix: allow configure message.timeout.ms and max.in.flight for kafka sink --- .../kafka-cdc-sink/risingwave.sql | 1 - src/connector/src/sink/kafka.rs | 70 +++++++++++-------- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/integration_tests/kafka-cdc-sink/risingwave.sql b/integration_tests/kafka-cdc-sink/risingwave.sql index 588731a33940d..fabddc9229e2d 100644 --- a/integration_tests/kafka-cdc-sink/risingwave.sql +++ b/integration_tests/kafka-cdc-sink/risingwave.sql @@ -22,6 +22,5 @@ connector = 'kafka', properties.bootstrap.server='message_queue:29092', topic = 'counts', type = 'debezium', -use_transaction = 'false', primary_key = 'id' ); \ No newline at end of file diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs index a5a524048bfd1..8e01ef6d765dd 100644 --- a/src/connector/src/sink/kafka.rs +++ b/src/connector/src/sink/kafka.rs @@ -50,10 +50,6 @@ use crate::{ pub const KAFKA_SINK: &str = "kafka"; -const fn _default_timeout() -> Duration { - Duration::from_secs(5) -} - const fn _default_max_retries() -> u32 { 3 } @@ -62,12 +58,16 @@ const fn _default_retry_backoff() -> Duration { Duration::from_millis(100) } -const fn _default_use_transaction() -> bool { +const fn _default_force_append_only() -> bool { false } -const fn _default_force_append_only() -> bool { - false +const fn _default_message_timeout_ms() -> usize { + 5000 +} + +const fn _default_max_in_flight_requests_per_connection() -> uszie { + 5 } #[derive(Debug, Clone, PartialEq, Display, Serialize, Deserialize, EnumString)] @@ -80,6 +80,8 @@ enum CompressionCodec { Zstd, } +/// See https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md +/// for the detailed meaning of these librdkafka producer properties #[serde_as] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RdKafkaPropertiesProducer { @@ -140,6 +142,24 @@ pub struct RdKafkaPropertiesProducer { #[serde(rename = "properties.compression.codec")] #[serde_as(as = "Option")] compression_codec: Option, + + /// Produce message timeout. + /// This value is used to limits the time a produced message waits for + /// successful delivery (including retries). + #[serde( + rename = "properties.message.timeout.ms", + default = "_default_message_timeout_ms" + )] + #[serde_as(as = "Option")] + message_timeout_ms: usize, + + /// The maximum number of unacknowledged requests the client will send on a single connection before blocking. + #[serde( + rename = "properties.max.in.flight.requests.per.connection", + default = "_default_max_in_flight_requests_per_connection" + )] + #[serde_as(as = "Option")] + max_in_flight_requests_per_connection: usize, } impl RdKafkaPropertiesProducer { @@ -171,6 +191,11 @@ impl RdKafkaPropertiesProducer { if let Some(v) = &self.compression_codec { c.set("compression.codec", v.to_string()); } + c.set("message.timeout.ms", self.message_timeout_ms.to_string()); + c.set( + "max.in.flight.requests.per.connection", + self.max_in_flight_requests_per_connection.to_string(), + ); } } @@ -193,13 +218,6 @@ pub struct KafkaConfig { )] pub force_append_only: bool, - #[serde( - rename = "properties.timeout", - default = "_default_timeout", - deserialize_with = "deserialize_duration_from_string" - )] - pub timeout: Duration, - #[serde( rename = "properties.retry.max", default = "_default_max_retries", @@ -214,12 +232,6 @@ pub struct KafkaConfig { )] pub retry_interval: Duration, - #[serde( - default = "_default_use_transaction", - deserialize_with = "deserialize_bool_from_string" - )] - pub use_transaction: bool, - /// We have parsed the primary key for an upsert kafka sink into a `usize` vector representing /// the indices of the pk columns in the frontend, so we simply store the primary key here /// as a string. @@ -372,10 +384,7 @@ impl KafkaSinkWriter { config.set_client(&mut c); // ClientConfig configuration - c.set("bootstrap.servers", &config.common.brokers) - .set("message.timeout.ms", "5000"); - // Note that we will not use transaction during sinking, thus set it to false - config.use_transaction = false; + c.set("bootstrap.servers", &config.common.brokers); // Create the producer context, will be used to create the producer let producer_ctx = PrivateLinkProducerContext::new( @@ -598,6 +607,8 @@ mod test { "properties.batch.num.messages".to_string() => "114514".to_string(), "properties.batch.size".to_string() => "114514".to_string(), "properties.compression.codec".to_string() => "zstd".to_string(), + "properties.message.timeout.ms".to_string() => "114514".to_string(), + "properties.max.in.flight.requests.per.connection".to_string() => "114514".to_string(), }; let c = KafkaConfig::from_hashmap(props).unwrap(); assert_eq!( @@ -608,6 +619,11 @@ mod test { c.rdkafka_properties.compression_codec, Some(CompressionCodec::Zstd) ); + assert_eq!(c.rdkafka_properties.message_timeout_ms, 114514); + assert_eq!( + c.rdkafka_properties.max_in_flight_requests_per_connection, + 114514 + ); let props: HashMap = hashmap! { // basic @@ -649,12 +665,10 @@ mod test { "topic".to_string() => "test".to_string(), "type".to_string() => "append-only".to_string(), "force_append_only".to_string() => "true".to_string(), - "use_transaction".to_string() => "False".to_string(), "properties.security.protocol".to_string() => "SASL".to_string(), "properties.sasl.mechanism".to_string() => "SASL".to_string(), "properties.sasl.username".to_string() => "test".to_string(), "properties.sasl.password".to_string() => "test".to_string(), - "properties.timeout".to_string() => "10s".to_string(), "properties.retry.max".to_string() => "20".to_string(), "properties.retry.interval".to_string() => "500ms".to_string(), }; @@ -663,8 +677,6 @@ mod test { assert_eq!(config.common.topic, "test"); assert_eq!(config.r#type, "append-only"); assert!(config.force_append_only); - assert!(!config.use_transaction); - assert_eq!(config.timeout, Duration::from_secs(10)); assert_eq!(config.max_retry_num, 20); assert_eq!(config.retry_interval, Duration::from_millis(500)); @@ -677,8 +689,6 @@ mod test { }; let config = KafkaConfig::from_hashmap(properties).unwrap(); assert!(!config.force_append_only); - assert!(!config.use_transaction); - assert_eq!(config.timeout, Duration::from_secs(5)); assert_eq!(config.max_retry_num, 3); assert_eq!(config.retry_interval, Duration::from_millis(100)); From 60303997f9aff656e55cdaae16a7b4484bd0cf64 Mon Sep 17 00:00:00 2001 From: Patrick Huang Date: Thu, 28 Sep 2023 12:50:12 +0800 Subject: [PATCH 2/4] fix typo --- src/connector/src/sink/kafka.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs index 8e01ef6d765dd..7412870cb1216 100644 --- a/src/connector/src/sink/kafka.rs +++ b/src/connector/src/sink/kafka.rs @@ -66,7 +66,7 @@ const fn _default_message_timeout_ms() -> usize { 5000 } -const fn _default_max_in_flight_requests_per_connection() -> uszie { +const fn _default_max_in_flight_requests_per_connection() -> usize { 5 } From ecb0383b6b68911c4742a848f3bf6f464fa00653 Mon Sep 17 00:00:00 2001 From: Patrick Huang Date: Thu, 28 Sep 2023 18:02:36 +0800 Subject: [PATCH 3/4] fix check --- src/connector/src/sink/kafka.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs index 7412870cb1216..e874500e96519 100644 --- a/src/connector/src/sink/kafka.rs +++ b/src/connector/src/sink/kafka.rs @@ -150,7 +150,7 @@ pub struct RdKafkaPropertiesProducer { rename = "properties.message.timeout.ms", default = "_default_message_timeout_ms" )] - #[serde_as(as = "Option")] + #[serde_as(as = "DisplayFromStr")] message_timeout_ms: usize, /// The maximum number of unacknowledged requests the client will send on a single connection before blocking. @@ -158,7 +158,7 @@ pub struct RdKafkaPropertiesProducer { rename = "properties.max.in.flight.requests.per.connection", default = "_default_max_in_flight_requests_per_connection" )] - #[serde_as(as = "Option")] + #[serde_as(as = "DisplayFromStr")] max_in_flight_requests_per_connection: usize, } @@ -375,7 +375,7 @@ pub struct KafkaSinkWriter { } impl KafkaSinkWriter { - pub async fn new(mut config: KafkaConfig, formatter: SinkFormatterImpl) -> Result { + pub async fn new(config: KafkaConfig, formatter: SinkFormatterImpl) -> Result { let inner: FutureProducer = { let mut c = ClientConfig::new(); From cd8a5a809290c52ca4a99108019f097c0eae8844 Mon Sep 17 00:00:00 2001 From: Patrick Huang Date: Thu, 28 Sep 2023 18:28:32 +0800 Subject: [PATCH 4/4] fix link --- src/connector/src/sink/kafka.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/connector/src/sink/kafka.rs b/src/connector/src/sink/kafka.rs index e874500e96519..498850091991c 100644 --- a/src/connector/src/sink/kafka.rs +++ b/src/connector/src/sink/kafka.rs @@ -80,7 +80,7 @@ enum CompressionCodec { Zstd, } -/// See https://github.com/confluentinc/librdkafka/blob/master/CONFIGURATION.md +/// See /// for the detailed meaning of these librdkafka producer properties #[serde_as] #[derive(Debug, Clone, Serialize, Deserialize)]