diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml index c5c9809d2e56..45ff802de304 100644 --- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml +++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml @@ -204,18 +204,6 @@ rules: cache: true labels: version: "$2" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" - name: "pinot_controller_tableConsumptionPaused_$3" - cache: true - labels: - tableName: "$1" - tableType: "$2" -- pattern: "\"org.apache.pinot.common.metrics\"<>(\\w+)" - name: "pinot_controller_tableDisabled_$3" - cache: true - labels: - tableName: "$1" - tableType: "$2" ## Metrics that fit the catch-all patterns above should not be added to this file. ## In case a metric does not fit the catch-all patterns, add them before this comment diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java index 3444ffae5f58..ca8c141447fd 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java @@ -153,7 +153,9 @@ public enum ControllerGauge implements AbstractMetrics.Gauge { TABLE_CONSUMPTION_PAUSED("tableConsumptionPaused", false), - TABLE_DISABLED("tableDisabled", false); + TABLE_DISABLED("tableDisabled", false), + + TABLE_REBALANCE_IN_PROGRESS("tableRebalanceInProgress", false); private final String _gaugeName; private final String _unit; diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java index 617564757e0b..d0af31044f1a 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/SegmentStatusChecker.java @@ -363,6 +363,7 @@ private void removeMetricsForTable(String tableNameWithType) { _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.PERCENT_SEGMENTS_AVAILABLE); _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.TABLE_DISABLED); _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.TABLE_CONSUMPTION_PAUSED); + _controllerMetrics.removeTableGauge(tableNameWithType, ControllerGauge.TABLE_REBALANCE_IN_PROGRESS); } private void setStatusToDefault() { diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/ZkBasedTableRebalanceObserver.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/ZkBasedTableRebalanceObserver.java index 7b57147ec085..f02a62cdee0d 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/ZkBasedTableRebalanceObserver.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/ZkBasedTableRebalanceObserver.java @@ -24,6 +24,8 @@ import java.util.HashMap; import java.util.Map; import org.apache.pinot.common.metadata.controllerjob.ControllerJobType; +import org.apache.pinot.common.metrics.ControllerGauge; +import org.apache.pinot.common.metrics.ControllerMetrics; import org.apache.pinot.controller.helix.core.PinotHelixResourceManager; import org.apache.pinot.spi.utils.CommonConstants; import org.apache.pinot.spi.utils.JsonUtils; @@ -48,6 +50,8 @@ public class ZkBasedTableRebalanceObserver implements TableRebalanceObserver { private boolean _isStopped = false; private RebalanceResult.Status _stopStatus; + private final ControllerMetrics _controllerMetrics; + public ZkBasedTableRebalanceObserver(String tableNameWithType, String rebalanceJobId, TableRebalanceContext tableRebalanceContext, PinotHelixResourceManager pinotHelixResourceManager) { Preconditions.checkState(tableNameWithType != null, "Table name cannot be null"); @@ -59,12 +63,14 @@ public ZkBasedTableRebalanceObserver(String tableNameWithType, String rebalanceJ _tableRebalanceProgressStats = new TableRebalanceProgressStats(); _tableRebalanceContext = tableRebalanceContext; _numUpdatesToZk = 0; + _controllerMetrics = ControllerMetrics.get(); } @Override public void onTrigger(Trigger trigger, Map> currentState, Map> targetState) { boolean updatedStatsInZk = false; + _controllerMetrics.setValueOfTableGauge(_tableNameWithType, ControllerGauge.TABLE_REBALANCE_IN_PROGRESS, 1); switch (trigger) { case START_TRIGGER: updateOnStart(currentState, targetState); @@ -119,6 +125,7 @@ private void updateOnStart(Map> currentState, public void onSuccess(String msg) { Preconditions.checkState(RebalanceResult.Status.DONE != _tableRebalanceProgressStats.getStatus(), "Table Rebalance already completed"); + _controllerMetrics.setValueOfTableGauge(_tableNameWithType, ControllerGauge.TABLE_REBALANCE_IN_PROGRESS, 0); long timeToFinishInSeconds = (System.currentTimeMillis() - _tableRebalanceProgressStats.getStartTimeMs()) / 1000L; _tableRebalanceProgressStats.setCompletionStatusMsg(msg); _tableRebalanceProgressStats.setTimeToFinishInSeconds(timeToFinishInSeconds); @@ -132,6 +139,7 @@ public void onSuccess(String msg) { @Override public void onError(String errorMsg) { + _controllerMetrics.setValueOfTableGauge(_tableNameWithType, ControllerGauge.TABLE_REBALANCE_IN_PROGRESS, 0); long timeToFinishInSeconds = (System.currentTimeMillis() - _tableRebalanceProgressStats.getStartTimeMs()) / 1000; _tableRebalanceProgressStats.setTimeToFinishInSeconds(timeToFinishInSeconds); _tableRebalanceProgressStats.setStatus(RebalanceResult.Status.FAILED); diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/rebalance/TestZkBasedTableRebalanceObserver.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/rebalance/TestZkBasedTableRebalanceObserver.java index 143caebf2bb6..245aa73aeea1 100644 --- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/rebalance/TestZkBasedTableRebalanceObserver.java +++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/rebalance/TestZkBasedTableRebalanceObserver.java @@ -21,8 +21,10 @@ import java.util.Arrays; import java.util.Map; import java.util.TreeMap; +import org.apache.pinot.common.metrics.ControllerMetrics; import org.apache.pinot.controller.helix.core.PinotHelixResourceManager; import org.apache.pinot.controller.helix.core.assignment.segment.SegmentAssignmentUtils; +import org.mockito.Mockito; import org.testng.annotations.Test; import static org.apache.pinot.spi.utils.CommonConstants.Helix.StateModel.SegmentStateModel.ERROR; @@ -41,6 +43,7 @@ void testZkObserverTracking() { PinotHelixResourceManager pinotHelixResourceManager = mock(PinotHelixResourceManager.class); // Mocking this. We will verify using numZkUpdate stat when(pinotHelixResourceManager.addControllerJobToZK(any(), any(), any())).thenReturn(true); + ControllerMetrics controllerMetrics = Mockito.mock(ControllerMetrics.class); TableRebalanceContext retryCtx = new TableRebalanceContext(); retryCtx.setConfig(new RebalanceConfig()); ZkBasedTableRebalanceObserver observer =