From 3bfc399550fad5d8d1dd9402a8cd2976563052bd Mon Sep 17 00:00:00 2001 From: luohaha <18810541851@163.com> Date: Fri, 17 Jan 2025 16:25:24 +0800 Subject: [PATCH 1/4] [BugFix] fix ingestion hang because of alter job timeout Signed-off-by: luohaha <18810541851@163.com> --- .../src/main/java/com/starrocks/alter/AlterJobV2.java | 8 ++++++-- .../java/com/starrocks/alter/SchemaChangeHandler.java | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java b/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java index e10426c0ad589..96318342448f5 100644 --- a/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java +++ b/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java @@ -223,8 +223,12 @@ public long getWarehouseId() { */ public synchronized void run() { if (isTimeout()) { - cancelHook(cancelImpl("Timeout")); - return; + bool cancelled = cancelImpl("Timeout"); + cancelHook(cancelled); + if (cancelled) { + // If this job can't be cancelled, we should execute it. + return; + } } // create connectcontext diff --git a/fe/fe-core/src/main/java/com/starrocks/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/com/starrocks/alter/SchemaChangeHandler.java index 2ff33e541d59c..0e62624e58f75 100644 --- a/fe/fe-core/src/main/java/com/starrocks/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/com/starrocks/alter/SchemaChangeHandler.java @@ -2110,7 +2110,7 @@ public AlterJobV2 createAlterMetaJob(AlterClause alterClause, Database db, OlapT long timeoutSecond = PropertyAnalyzer.analyzeTimeout(properties, Config.alter_table_timeout_second); alterMetaJob = new LakeTableAlterMetaJob(GlobalStateMgr.getCurrentState().getNextId(), db.getId(), - olapTable.getId(), olapTable.getName(), timeoutSecond, + olapTable.getId(), olapTable.getName(), timeoutSecond * 1000 /* should be ms*/, TTabletMetaType.ENABLE_PERSISTENT_INDEX, enablePersistentIndex, persistentIndexType); } else { // shouldn't happen From eefc701a9a262e95db16a08fbaf4c553d4b400fc Mon Sep 17 00:00:00 2001 From: luohaha <18810541851@163.com> Date: Fri, 17 Jan 2025 16:38:23 +0800 Subject: [PATCH 2/4] update Signed-off-by: luohaha <18810541851@163.com> --- fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java b/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java index 96318342448f5..834e5f0890c6d 100644 --- a/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java +++ b/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java @@ -223,7 +223,7 @@ public long getWarehouseId() { */ public synchronized void run() { if (isTimeout()) { - bool cancelled = cancelImpl("Timeout"); + boolean cancelled = cancelImpl("Timeout"); cancelHook(cancelled); if (cancelled) { // If this job can't be cancelled, we should execute it. From 63d37998a2f2a9cd3f77f698b5cba36aa368cf67 Mon Sep 17 00:00:00 2001 From: luohaha <18810541851@163.com> Date: Fri, 17 Jan 2025 22:43:36 +0800 Subject: [PATCH 3/4] fix comment Signed-off-by: luohaha <18810541851@163.com> --- .../java/com/starrocks/alter/AlterJobV2.java | 16 +++++++++------- .../alter/LakeTableSchemaChangeJob.java | 4 +--- .../java/com/starrocks/alter/RollupJobV2.java | 4 +--- .../com/starrocks/alter/SchemaChangeJobV2.java | 4 +--- 4 files changed, 12 insertions(+), 16 deletions(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java b/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java index 834e5f0890c6d..1b238e9c2202a 100644 --- a/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java +++ b/fe/fe-core/src/main/java/com/starrocks/alter/AlterJobV2.java @@ -223,9 +223,7 @@ public long getWarehouseId() { */ public synchronized void run() { if (isTimeout()) { - boolean cancelled = cancelImpl("Timeout"); - cancelHook(cancelled); - if (cancelled) { + if (cancelInternal("Timeout")) { // If this job can't be cancelled, we should execute it. return; } @@ -259,15 +257,19 @@ public synchronized void run() { } // else: handle the new state } } catch (AlterCancelException e) { - cancelHook(cancelImpl(e.getMessage())); + cancelInternal(e.getMessage()); } } + protected boolean cancelInternal(String errMsg) { + boolean cancelled = cancelImpl(errMsg); + cancelHook(cancelled); + return cancelled; + } + public boolean cancel(String errMsg) { synchronized (this) { - boolean cancelled = cancelImpl(errMsg); - cancelHook(cancelled); - return cancelled; + return cancelInternal(errMsg); } } diff --git a/fe/fe-core/src/main/java/com/starrocks/alter/LakeTableSchemaChangeJob.java b/fe/fe-core/src/main/java/com/starrocks/alter/LakeTableSchemaChangeJob.java index 21a6e5e9c0afc..0cef06b5d7b61 100644 --- a/fe/fe-core/src/main/java/com/starrocks/alter/LakeTableSchemaChangeJob.java +++ b/fe/fe-core/src/main/java/com/starrocks/alter/LakeTableSchemaChangeJob.java @@ -1080,9 +1080,7 @@ public final boolean cancel(String errMsg) { createReplicaLatch.countDownToZero(new Status(TStatusCode.OK, "")); } synchronized (this) { - boolean cancelled = cancelImpl(errMsg); - cancelHook(cancelled); - return cancelled; + return cancelInternal(errMsg); } } finally { isCancelling.set(false); diff --git a/fe/fe-core/src/main/java/com/starrocks/alter/RollupJobV2.java b/fe/fe-core/src/main/java/com/starrocks/alter/RollupJobV2.java index d9538eb4fe84b..54939589534e0 100644 --- a/fe/fe-core/src/main/java/com/starrocks/alter/RollupJobV2.java +++ b/fe/fe-core/src/main/java/com/starrocks/alter/RollupJobV2.java @@ -770,9 +770,7 @@ public final boolean cancel(String errMsg) { createReplicaLatch.countDownToZero(new Status(TStatusCode.OK, "")); } synchronized (this) { - boolean cancelled = cancelImpl(errMsg); - cancelHook(cancelled); - return cancelled; + return cancelInternal(errMsg) } } finally { isCancelling.set(false); diff --git a/fe/fe-core/src/main/java/com/starrocks/alter/SchemaChangeJobV2.java b/fe/fe-core/src/main/java/com/starrocks/alter/SchemaChangeJobV2.java index 19c32b871839e..5545a0ef9ed5a 100644 --- a/fe/fe-core/src/main/java/com/starrocks/alter/SchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/com/starrocks/alter/SchemaChangeJobV2.java @@ -946,9 +946,7 @@ public final boolean cancel(String errMsg) { createReplicaLatch.countDownToZero(new Status(TStatusCode.OK, "")); } synchronized (this) { - boolean cancelled = cancelImpl(errMsg); - cancelHook(cancelled); - return cancelled; + return cancelInternal(errMsg); } } finally { isCancelling.set(false); From 87f53ac569fd1643a7dcd9b56e63c4f246dbc8b9 Mon Sep 17 00:00:00 2001 From: luohaha <18810541851@163.com> Date: Sat, 18 Jan 2025 00:52:05 +0800 Subject: [PATCH 4/4] update Signed-off-by: luohaha <18810541851@163.com> --- fe/fe-core/src/main/java/com/starrocks/alter/RollupJobV2.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/alter/RollupJobV2.java b/fe/fe-core/src/main/java/com/starrocks/alter/RollupJobV2.java index 54939589534e0..17dda7b2bc508 100644 --- a/fe/fe-core/src/main/java/com/starrocks/alter/RollupJobV2.java +++ b/fe/fe-core/src/main/java/com/starrocks/alter/RollupJobV2.java @@ -770,7 +770,7 @@ public final boolean cancel(String errMsg) { createReplicaLatch.countDownToZero(new Status(TStatusCode.OK, "")); } synchronized (this) { - return cancelInternal(errMsg) + return cancelInternal(errMsg); } } finally { isCancelling.set(false);