From 0ede2ea26bcbe946ac6a30f21311177ec4ff3cf6 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Tue, 5 Mar 2024 16:22:31 +0800 Subject: [PATCH 1/7] trace error --- src/expr/udf/src/external.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/expr/udf/src/external.rs b/src/expr/udf/src/external.rs index ae72a7bb5551d..0353d7d259bda 100644 --- a/src/expr/udf/src/external.rs +++ b/src/expr/udf/src/external.rs @@ -228,7 +228,12 @@ impl ArrowFlightUdfClient { Err(err) if err.is_connection_error() => { tracing::error!(error = %err.as_report(), "UDF connection error. retry..."); } - ret => return ret, + ret => { + if ret.is_err() { + tracing::error!(error = %ret.as_ref().unwrap_err().as_report(), "UDF error. exiting..."); + } + return ret; + } } tokio::time::sleep(backoff).await; backoff *= 2; From 25920a4f97a3a672f6c003e33d88b710d20b0bc4 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Tue, 5 Mar 2024 16:28:56 +0800 Subject: [PATCH 2/7] skip single node --- ci/scripts/run-e2e-test.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/scripts/run-e2e-test.sh b/ci/scripts/run-e2e-test.sh index 24eaa3881c123..c25b89ce54643 100755 --- a/ci/scripts/run-e2e-test.sh +++ b/ci/scripts/run-e2e-test.sh @@ -88,7 +88,9 @@ echo "--- e2e, $mode, batch" RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \ cluster_start sqllogictest -p 4566 -d dev './e2e_test/ddl/**/*.slt' --junit "batch-ddl-${profile}" -sqllogictest -p 4566 -d dev './e2e_test/background_ddl/basic.slt' --junit "batch-ddl-${profile}" +if [[ $mode != "single-node" ]]; then + sqllogictest -p 4566 -d dev './e2e_test/background_ddl/basic.slt' --junit "batch-ddl-${profile}" +fi sqllogictest -p 4566 -d dev './e2e_test/visibility_mode/*.slt' --junit "batch-${profile}" sqllogictest -p 4566 -d dev './e2e_test/ttl/ttl.slt' sqllogictest -p 4566 -d dev './e2e_test/database/prepare.slt' From 96d1f1e29f64bc289caf0e7f3e439bef7bbe94f2 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Wed, 6 Mar 2024 15:27:36 +0800 Subject: [PATCH 3/7] handle errors --- src/expr/udf/src/error.rs | 7 +++++++ src/expr/udf/src/external.rs | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/expr/udf/src/error.rs b/src/expr/udf/src/error.rs index 6fd83a6171340..4b6f67d5e691c 100644 --- a/src/expr/udf/src/error.rs +++ b/src/expr/udf/src/error.rs @@ -58,6 +58,13 @@ impl Error { _ => false, } } + + pub fn is_tonic_error(&self) -> bool { + match self.inner() { + ErrorInner::Tonic(_) | ErrorInner::Connect(_) => true, + _ => false, + } + } } static_assertions::const_assert_eq!(std::mem::size_of::(), 8); diff --git a/src/expr/udf/src/external.rs b/src/expr/udf/src/external.rs index 0353d7d259bda..8b40c6d6ba69b 100644 --- a/src/expr/udf/src/external.rs +++ b/src/expr/udf/src/external.rs @@ -225,8 +225,8 @@ impl ArrowFlightUdfClient { let mut backoff = Duration::from_millis(100); loop { match self.call(id, input.clone(), fragment_id).await { - Err(err) if err.is_connection_error() => { - tracing::error!(error = %err.as_report(), "UDF connection error. retry..."); + Err(err) if err.is_tonic_error() => { + tracing::error!(error = %err.as_report(), "UDF tonic error. retry..."); } ret => { if ret.is_err() { From da7f3f545942c34df46a965b1ef191016240c1e1 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Wed, 6 Mar 2024 15:35:04 +0800 Subject: [PATCH 4/7] fix --- src/expr/udf/src/error.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/expr/udf/src/error.rs b/src/expr/udf/src/error.rs index 4b6f67d5e691c..17306f172970d 100644 --- a/src/expr/udf/src/error.rs +++ b/src/expr/udf/src/error.rs @@ -23,9 +23,6 @@ pub type Result = std::result::Result; #[derive(Error, Debug, Box, Construct)] #[thiserror_ext(newtype(name = Error))] pub enum ErrorInner { - #[error("failed to connect to UDF service: {0}")] - Connect(#[from] tonic::transport::Error), - #[error("failed to send requests to UDF service: {0}")] Tonic(#[from] tonic::Status), @@ -60,10 +57,7 @@ impl Error { } pub fn is_tonic_error(&self) -> bool { - match self.inner() { - ErrorInner::Tonic(_) | ErrorInner::Connect(_) => true, - _ => false, - } + matches!(self.inner(), ErrorInner::Tonic(_)) } } From 10c0aa68b5085b997639cb3b0e3103a58badf712 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Wed, 6 Mar 2024 16:30:33 +0800 Subject: [PATCH 5/7] increase main-cron timeout --- ci/workflows/main-cron.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/workflows/main-cron.yml b/ci/workflows/main-cron.yml index 51f55d60d21de..23193e0cfd3ee 100644 --- a/ci/workflows/main-cron.yml +++ b/ci/workflows/main-cron.yml @@ -87,7 +87,7 @@ steps: config: ci/docker-compose.yml mount-buildkite-agent: true - ./ci/plugins/upload-failure-logs - timeout_in_minutes: 60 + timeout_in_minutes: 65 retry: *auto-retry - label: "end-to-end test (parallel) (release)" From 1dc1a9b3f2da21b6c4d11d8e1704356f26e42b92 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Wed, 6 Mar 2024 16:59:32 +0800 Subject: [PATCH 6/7] Revert "increase main-cron timeout" This reverts commit 10c0aa68b5085b997639cb3b0e3103a58badf712. --- ci/workflows/main-cron.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/workflows/main-cron.yml b/ci/workflows/main-cron.yml index 23193e0cfd3ee..51f55d60d21de 100644 --- a/ci/workflows/main-cron.yml +++ b/ci/workflows/main-cron.yml @@ -87,7 +87,7 @@ steps: config: ci/docker-compose.yml mount-buildkite-agent: true - ./ci/plugins/upload-failure-logs - timeout_in_minutes: 65 + timeout_in_minutes: 60 retry: *auto-retry - label: "end-to-end test (parallel) (release)" From c69e06b4fcc19e74548a92f08a0c3e0661cb940b Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Wed, 6 Mar 2024 16:59:50 +0800 Subject: [PATCH 7/7] Revert "skip single node" This reverts commit 25920a4f97a3a672f6c003e33d88b710d20b0bc4. --- ci/scripts/run-e2e-test.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/scripts/run-e2e-test.sh b/ci/scripts/run-e2e-test.sh index c25b89ce54643..24eaa3881c123 100755 --- a/ci/scripts/run-e2e-test.sh +++ b/ci/scripts/run-e2e-test.sh @@ -88,9 +88,7 @@ echo "--- e2e, $mode, batch" RUST_LOG="info,risingwave_stream=info,risingwave_batch=info,risingwave_storage=info" \ cluster_start sqllogictest -p 4566 -d dev './e2e_test/ddl/**/*.slt' --junit "batch-ddl-${profile}" -if [[ $mode != "single-node" ]]; then - sqllogictest -p 4566 -d dev './e2e_test/background_ddl/basic.slt' --junit "batch-ddl-${profile}" -fi +sqllogictest -p 4566 -d dev './e2e_test/background_ddl/basic.slt' --junit "batch-ddl-${profile}" sqllogictest -p 4566 -d dev './e2e_test/visibility_mode/*.slt' --junit "batch-${profile}" sqllogictest -p 4566 -d dev './e2e_test/ttl/ttl.slt' sqllogictest -p 4566 -d dev './e2e_test/database/prepare.slt'