diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto index ee2df89e81b..b487c7c7cec 100644 --- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto +++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto @@ -617,30 +617,31 @@ message ContainerBalancerStatusInfoRequestProto { message ContainerBalancerStatusInfoResponseProto { optional bool isRunning = 1; - optional ContainerBalancerStatusInfo containerBalancerStatusInfo = 2; + optional ContainerBalancerStatusInfoProto containerBalancerStatusInfo = 2; } -message ContainerBalancerStatusInfo { +message ContainerBalancerStatusInfoProto { optional uint64 startedAt = 1; optional ContainerBalancerConfigurationProto configuration = 2; - repeated ContainerBalancerTaskIterationStatusInfo iterationsStatusInfo = 3; + repeated ContainerBalancerTaskIterationStatusInfoProto iterationsStatusInfo = 3; } -message ContainerBalancerTaskIterationStatusInfo { +message ContainerBalancerTaskIterationStatusInfoProto { optional int32 iterationNumber = 1; optional string iterationResult = 2; - optional int64 sizeScheduledForMoveGB = 3; - optional int64 dataSizeMovedGB = 4; + optional int64 sizeScheduledForMove = 3; + optional int64 dataSizeMoved = 4; optional int64 containerMovesScheduled = 5; optional int64 containerMovesCompleted = 6; optional int64 containerMovesFailed = 7; optional int64 containerMovesTimeout = 8; - repeated NodeTransferInfo sizeEnteringNodesGB = 9; - repeated NodeTransferInfo sizeLeavingNodesGB = 10; + repeated NodeTransferInfoProto sizeEnteringNodes = 9; + repeated NodeTransferInfoProto sizeLeavingNodes = 10; + optional int64 iterationDuration = 11; } -message NodeTransferInfo { +message NodeTransferInfoProto { optional string uuid = 1; - optional int64 dataVolumeGB = 2; + optional int64 dataVolume = 2; } message DecommissionScmRequestProto { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/AbstractFindTargetGreedy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/AbstractFindTargetGreedy.java index 88657047a07..df45ffd9b62 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/AbstractFindTargetGreedy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/AbstractFindTargetGreedy.java @@ -283,4 +283,9 @@ NodeManager getNodeManager() { public Map getSizeEnteringNodes() { return sizeEnteringNode; } + + @Override + public void clearSizeEnteringNodes() { + sizeEnteringNode.clear(); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java index 6446089db35..3e164cb0bba 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerMetrics.java @@ -40,6 +40,9 @@ public final class ContainerBalancerMetrics { " in the latest iteration.") private MutableCounterLong dataSizeMovedGBInLatestIteration; + @Metric(about = "Amount of bytes that the Container Balancer moved in the latest iteration.") + private MutableCounterLong dataSizeMovedBytesInLatestIteration; + @Metric(about = "Number of completed container moves performed by " + "Container Balancer in the latest iteration.") private MutableCounterLong numContainerMovesCompletedInLatestIteration; @@ -131,14 +134,16 @@ void incrementNumContainerMovesScheduledInLatestIteration(long valueToAdd) { this.numContainerMovesScheduledInLatestIteration.incr(valueToAdd); } + /** + * Reset the number of containers scheduled to move in the last iteration. + */ public void resetNumContainerMovesScheduledInLatestIteration() { numContainerMovesScheduledInLatestIteration.incr( -getNumContainerMovesScheduledInLatestIteration()); } /** - * Gets the amount of data moved by Container Balancer in the latest - * iteration. + * Retrieves the amount of data moved by the Container Balancer in the latest iteration. * @return size in GB */ public long getDataSizeMovedGBInLatestIteration() { @@ -154,6 +159,29 @@ public void resetDataSizeMovedGBInLatestIteration() { -getDataSizeMovedGBInLatestIteration()); } + /** + * Retrieves the amount of data moved by the Container Balancer in the latest iteration. + * @return size in bytes + */ + public long getDataSizeMovedInLatestIteration() { + return dataSizeMovedBytesInLatestIteration.value(); + } + + /** + * Increment the amount of data moved in the last iteration. + * @param bytes bytes to add + */ + public void incrementDataSizeMovedInLatestIteration(long bytes) { + this.dataSizeMovedBytesInLatestIteration.incr(bytes); + } + + /** + * Reset the amount of data moved in the last iteration. + */ + public void resetDataSizeMovedInLatestIteration() { + dataSizeMovedBytesInLatestIteration.incr(-getDataSizeMovedInLatestIteration()); + } + /** * Gets the number of container moves performed by Container Balancer in the * latest iteration. @@ -163,11 +191,6 @@ public long getNumContainerMovesCompletedInLatestIteration() { return numContainerMovesCompletedInLatestIteration.value(); } - public void incrementNumContainerMovesCompletedInLatestIteration( - long valueToAdd) { - this.numContainerMovesCompletedInLatestIteration.incr(valueToAdd); - } - public void incrementCurrentIterationContainerMoveMetric( MoveManager.MoveResult result, long valueToAdd) { if (result == null) { @@ -204,9 +227,11 @@ public void incrementCurrentIterationContainerMoveMetric( } } + /** + * Reset the number of containers moved in the last iteration. + */ public void resetNumContainerMovesCompletedInLatestIteration() { - numContainerMovesCompletedInLatestIteration.incr( - -getNumContainerMovesCompletedInLatestIteration()); + numContainerMovesCompletedInLatestIteration.incr(-getNumContainerMovesCompletedInLatestIteration()); } /** @@ -218,14 +243,19 @@ public long getNumContainerMovesTimeoutInLatestIteration() { return numContainerMovesTimeoutInLatestIteration.value(); } + /** + * Increases the number of timeout container moves in the latest iteration. + */ public void incrementNumContainerMovesTimeoutInLatestIteration( long valueToAdd) { this.numContainerMovesTimeoutInLatestIteration.incr(valueToAdd); } + /** + * Reset the number of timeout container moves in the latest iteration. + */ public void resetNumContainerMovesTimeoutInLatestIteration() { - numContainerMovesTimeoutInLatestIteration.incr( - -getNumContainerMovesTimeoutInLatestIteration()); + numContainerMovesTimeoutInLatestIteration.incr(-getNumContainerMovesTimeoutInLatestIteration()); } /** diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerStatusInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerStatusInfo.java index cbe8385e53a..a0552142b3b 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerStatusInfo.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerStatusInfo.java @@ -19,9 +19,11 @@ package org.apache.hadoop.hdds.scm.container.balancer; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; import java.time.OffsetDateTime; import java.util.List; +import java.util.stream.Collectors; /** * Info about balancer status. @@ -51,4 +53,21 @@ public HddsProtos.ContainerBalancerConfigurationProto getConfiguration() { public List getIterationsStatusInfo() { return iterationsStatusInfo; } + + /** + * Converts an instance into a protobuf-compatible object. + * @return proto representation + */ + public StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoProto toProto() { + return StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoProto + .newBuilder() + .setStartedAt(getStartedAt().toEpochSecond()) + .setConfiguration(getConfiguration()) + .addAllIterationsStatusInfo( + getIterationsStatusInfo() + .stream() + .map(ContainerBalancerTaskIterationStatusInfo::toProto) + .collect(Collectors.toList()) + ).build(); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTask.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTask.java index 2c113e8e6ac..f1eee8c6755 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTask.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTask.java @@ -37,12 +37,12 @@ import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.ozone.OzoneConsts; -import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.time.Duration; +import java.time.OffsetDateTime; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -51,18 +51,24 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Queue; import java.util.Set; +import java.util.UUID; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; +import static java.time.OffsetDateTime.now; +import static java.util.Collections.emptyMap; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NODE_REPORT_INTERVAL; import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_NODE_REPORT_INTERVAL_DEFAULT; +import static org.apache.hadoop.util.StringUtils.byteDesc; /** * Container balancer task performs move of containers between over- and @@ -72,6 +78,7 @@ public class ContainerBalancerTask implements Runnable { public static final Logger LOG = LoggerFactory.getLogger(ContainerBalancerTask.class); + public static final long ABSENCE_OF_DURATION = -1L; private NodeManager nodeManager; private ContainerManager containerManager; @@ -101,7 +108,6 @@ public class ContainerBalancerTask implements Runnable { private double lowerLimit; private ContainerBalancerSelectionCriteria selectionCriteria; private volatile Status taskStatus = Status.RUNNING; - /* Since a container can be selected only once during an iteration, these maps use it as a primary key to track source to target pairings. @@ -119,6 +125,8 @@ public class ContainerBalancerTask implements Runnable { private int nextIterationIndex; private boolean delayStart; private Queue iterationsStatistic; + private OffsetDateTime currentIterationStarted; + private AtomicBoolean isCurrentIterationInProgress = new AtomicBoolean(false); /** * Constructs ContainerBalancerTask with the specified arguments. @@ -216,6 +224,10 @@ private void balance() { // leader change or restart int i = nextIterationIndex; for (; i < iterations && isBalancerRunning(); i++) { + currentIterationStarted = now(); + + isCurrentIterationInProgress.compareAndSet(false, true); + // reset some variables and metrics for this iteration resetState(); if (config.getTriggerDuEnable()) { @@ -262,21 +274,29 @@ private void balance() { return; } - IterationResult iR = doIteration(); - saveIterationStatistic(i, iR); + IterationResult currentIterationResult = doIteration(); + ContainerBalancerTaskIterationStatusInfo iterationStatistic = + getIterationStatistic(i + 1, currentIterationResult, getCurrentIterationDuration()); + iterationsStatistic.offer(iterationStatistic); + + isCurrentIterationInProgress.compareAndSet(true, false); + + findTargetStrategy.clearSizeEnteringNodes(); + findSourceStrategy.clearSizeLeavingNodes(); + metrics.incrementNumIterations(1); - LOG.info("Result of this iteration of Container Balancer: {}", iR); + LOG.info("Result of this iteration of Container Balancer: {}", currentIterationResult); // if no new move option is generated, it means the cluster cannot be // balanced anymore; so just stop balancer - if (iR == IterationResult.CAN_NOT_BALANCE_ANY_MORE) { - tryStopWithSaveConfiguration(iR.toString()); + if (currentIterationResult == IterationResult.CAN_NOT_BALANCE_ANY_MORE) { + tryStopWithSaveConfiguration(currentIterationResult.toString()); return; } // persist next iteration index - if (iR == IterationResult.ITERATION_COMPLETED) { + if (currentIterationResult == IterationResult.ITERATION_COMPLETED) { try { saveConfiguration(config, true, i + 1); } catch (IOException | TimeoutException e) { @@ -307,80 +327,143 @@ private void balance() { tryStopWithSaveConfiguration("Completed all iterations."); } - private void saveIterationStatistic(Integer iterationNumber, IterationResult iR) { - ContainerBalancerTaskIterationStatusInfo iterationStatistic = new ContainerBalancerTaskIterationStatusInfo( + private ContainerBalancerTaskIterationStatusInfo getIterationStatistic(Integer iterationNumber, + IterationResult currentIterationResult, + long iterationDuration) { + String currentIterationResultName = currentIterationResult == null ? null : currentIterationResult.name(); + Map sizeEnteringDataToNodes = + convertToNodeIdToTrafficMap(findTargetStrategy.getSizeEnteringNodes()); + Map sizeLeavingDataFromNodes = + convertToNodeIdToTrafficMap(findSourceStrategy.getSizeLeavingNodes()); + IterationInfo iterationInfo = new IterationInfo( iterationNumber, - iR.name(), - getSizeScheduledForMoveInLatestIteration() / OzoneConsts.GB, - metrics.getDataSizeMovedGBInLatestIteration(), - metrics.getNumContainerMovesScheduledInLatestIteration(), - metrics.getNumContainerMovesCompletedInLatestIteration(), - metrics.getNumContainerMovesFailedInLatestIteration(), - metrics.getNumContainerMovesTimeoutInLatestIteration(), - findTargetStrategy.getSizeEnteringNodes() - .entrySet() - .stream() - .filter(datanodeDetailsLongEntry -> datanodeDetailsLongEntry.getValue() > 0) - .collect( - Collectors.toMap( - entry -> entry.getKey().getUuid(), - entry -> entry.getValue() / OzoneConsts.GB - ) - ), - findSourceStrategy.getSizeLeavingNodes() - .entrySet() - .stream() - .filter(datanodeDetailsLongEntry -> datanodeDetailsLongEntry.getValue() > 0) - .collect( - Collectors.toMap( - entry -> entry.getKey().getUuid(), - entry -> entry.getValue() / OzoneConsts.GB - ) - ) + currentIterationResultName, + iterationDuration ); - iterationsStatistic.offer(iterationStatistic); + ContainerMoveInfo containerMoveInfo = new ContainerMoveInfo(metrics); + + DataMoveInfo dataMoveInfo = + getDataMoveInfo(currentIterationResultName, sizeEnteringDataToNodes, sizeLeavingDataFromNodes); + return new ContainerBalancerTaskIterationStatusInfo(iterationInfo, containerMoveInfo, dataMoveInfo); } + private DataMoveInfo getDataMoveInfo(String currentIterationResultName, Map sizeEnteringDataToNodes, + Map sizeLeavingDataFromNodes) { + if (currentIterationResultName == null) { + // For unfinished iteration + return new DataMoveInfo( + getSizeScheduledForMoveInLatestIteration(), + sizeActuallyMovedInLatestIteration, + sizeEnteringDataToNodes, + sizeLeavingDataFromNodes + ); + } else { + // For finished iteration + return new DataMoveInfo( + getSizeScheduledForMoveInLatestIteration(), + metrics.getDataSizeMovedInLatestIteration(), + sizeEnteringDataToNodes, + sizeLeavingDataFromNodes + ); + } + } + + private Map convertToNodeIdToTrafficMap(Map nodeTrafficMap) { + return nodeTrafficMap + .entrySet() + .stream() + .filter(Objects::nonNull) + .filter(datanodeDetailsLongEntry -> datanodeDetailsLongEntry.getValue() > 0) + .collect( + Collectors.toMap( + entry -> entry.getKey().getUuid(), + Map.Entry::getValue + ) + ); + } + + /** + * Get current iteration statistics. + * @return current iteration statistic + */ public List getCurrentIterationsStatistic() { List resultList = new ArrayList<>(iterationsStatistic); + ContainerBalancerTaskIterationStatusInfo currentIterationStatistic = createCurrentIterationStatistic(); + if (currentIterationStatistic != null) { + resultList.add(currentIterationStatistic); + } + return resultList; + } + + private ContainerBalancerTaskIterationStatusInfo createCurrentIterationStatistic() { + List resultList = new ArrayList<>(iterationsStatistic); int lastIterationNumber = resultList.stream() .mapToInt(ContainerBalancerTaskIterationStatusInfo::getIterationNumber) .max() .orElse(0); + long iterationDuration = getCurrentIterationDuration(); + + if (isCurrentIterationInProgress.get()) { + return getIterationStatistic(lastIterationNumber + 1, null, iterationDuration); + } else { + return null; + } + } - ContainerBalancerTaskIterationStatusInfo currentIterationStatistic = new ContainerBalancerTaskIterationStatusInfo( + private static ContainerBalancerTaskIterationStatusInfo getEmptyCurrentIterationStatistic( + long iterationDuration) { + ContainerMoveInfo containerMoveInfo = new ContainerMoveInfo(0, 0, 0, 0); + DataMoveInfo dataMoveInfo = new DataMoveInfo( + 0, + 0, + emptyMap(), + emptyMap() + ); + IterationInfo iterationInfo = new IterationInfo( + 0, + null, + iterationDuration + ); + return new ContainerBalancerTaskIterationStatusInfo( + iterationInfo, + containerMoveInfo, + dataMoveInfo + ); + } + + private ContainerBalancerTaskIterationStatusInfo getFilledCurrentIterationStatistic(int lastIterationNumber, + long iterationDuration) { + Map sizeEnteringDataToNodes = + convertToNodeIdToTrafficMap(findTargetStrategy.getSizeEnteringNodes()); + Map sizeLeavingDataFromNodes = + convertToNodeIdToTrafficMap(findSourceStrategy.getSizeLeavingNodes()); + + ContainerMoveInfo containerMoveInfo = new ContainerMoveInfo(metrics); + DataMoveInfo dataMoveInfo = new DataMoveInfo( + getSizeScheduledForMoveInLatestIteration(), + sizeActuallyMovedInLatestIteration, + sizeEnteringDataToNodes, + sizeLeavingDataFromNodes + ); + IterationInfo iterationInfo = new IterationInfo( lastIterationNumber + 1, null, - getSizeScheduledForMoveInLatestIteration() / OzoneConsts.GB, - sizeActuallyMovedInLatestIteration / OzoneConsts.GB, - metrics.getNumContainerMovesScheduledInLatestIteration(), - metrics.getNumContainerMovesCompletedInLatestIteration(), - metrics.getNumContainerMovesFailedInLatestIteration(), - metrics.getNumContainerMovesTimeoutInLatestIteration(), - findTargetStrategy.getSizeEnteringNodes() - .entrySet() - .stream() - .filter(datanodeDetailsLongEntry -> datanodeDetailsLongEntry.getValue() > 0) - .collect( - Collectors.toMap( - entry -> entry.getKey().getUuid(), - entry -> entry.getValue() / OzoneConsts.GB - ) - ), - findSourceStrategy.getSizeLeavingNodes() - .entrySet() - .stream() - .filter(datanodeDetailsLongEntry -> datanodeDetailsLongEntry.getValue() > 0) - .collect( - Collectors.toMap( - entry -> entry.getKey().getUuid(), - entry -> entry.getValue() / OzoneConsts.GB - ) - ) + iterationDuration ); - resultList.add(currentIterationStatistic); - return resultList; + return new ContainerBalancerTaskIterationStatusInfo( + iterationInfo, + containerMoveInfo, + dataMoveInfo + ); + } + + private long getCurrentIterationDuration() { + if (currentIterationStarted == null) { + return ABSENCE_OF_DURATION; + } else { + return now().toEpochSecond() - currentIterationStarted.toEpochSecond(); + } } /** @@ -706,26 +789,28 @@ private void checkIterationMoveResults() { } } - countDatanodesInvolvedPerIteration = - selectedSources.size() + selectedTargets.size(); - metrics.incrementNumDatanodesInvolvedInLatestIteration( - countDatanodesInvolvedPerIteration); - metrics.incrementNumContainerMovesScheduled( - metrics.getNumContainerMovesScheduledInLatestIteration()); - metrics.incrementNumContainerMovesCompleted( - metrics.getNumContainerMovesCompletedInLatestIteration()); - metrics.incrementNumContainerMovesTimeout( - metrics.getNumContainerMovesTimeoutInLatestIteration()); - metrics.incrementDataSizeMovedGBInLatestIteration( - sizeActuallyMovedInLatestIteration / OzoneConsts.GB); - metrics.incrementDataSizeMovedGB( - metrics.getDataSizeMovedGBInLatestIteration()); - metrics.incrementNumContainerMovesFailed( - metrics.getNumContainerMovesFailedInLatestIteration()); + countDatanodesInvolvedPerIteration = selectedSources.size() + selectedTargets.size(); + + metrics.incrementNumDatanodesInvolvedInLatestIteration(countDatanodesInvolvedPerIteration); + + metrics.incrementNumContainerMovesScheduled(metrics.getNumContainerMovesScheduledInLatestIteration()); + + metrics.incrementNumContainerMovesCompleted(metrics.getNumContainerMovesCompletedInLatestIteration()); + + metrics.incrementNumContainerMovesTimeout(metrics.getNumContainerMovesTimeoutInLatestIteration()); + + metrics.incrementDataSizeMovedGBInLatestIteration(sizeActuallyMovedInLatestIteration / OzoneConsts.GB); + + metrics.incrementDataSizeMovedInLatestIteration(sizeActuallyMovedInLatestIteration); + + metrics.incrementDataSizeMovedGB(metrics.getDataSizeMovedGBInLatestIteration()); + + metrics.incrementNumContainerMovesFailed(metrics.getNumContainerMovesFailedInLatestIteration()); + LOG.info("Iteration Summary. Number of Datanodes involved: {}. Size " + "moved: {} ({} Bytes). Number of Container moves completed: {}.", countDatanodesInvolvedPerIteration, - StringUtils.byteDesc(sizeActuallyMovedInLatestIteration), + byteDesc(sizeActuallyMovedInLatestIteration), sizeActuallyMovedInLatestIteration, metrics.getNumContainerMovesCompletedInLatestIteration()); } @@ -1144,6 +1229,7 @@ private void resetState() { this.sizeScheduledForMoveInLatestIteration = 0; this.sizeActuallyMovedInLatestIteration = 0; metrics.resetDataSizeMovedGBInLatestIteration(); + metrics.resetDataSizeMovedInLatestIteration(); metrics.resetNumContainerMovesScheduledInLatestIteration(); metrics.resetNumContainerMovesCompletedInLatestIteration(); metrics.resetNumContainerMovesTimeoutInLatestIteration(); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTaskIterationStatusInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTaskIterationStatusInfo.java index 1d597b0ca27..a466d9fd474 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTaskIterationStatusInfo.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerBalancerTaskIterationStatusInfo.java @@ -18,86 +18,160 @@ package org.apache.hadoop.hdds.scm.container.balancer; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; + +import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.UUID; +import java.util.stream.Collectors; /** * Information about balancer task iteration. */ public class ContainerBalancerTaskIterationStatusInfo { - private final Integer iterationNumber; - private final String iterationResult; - private final long sizeScheduledForMoveGB; - private final long dataSizeMovedGB; - private final long containerMovesScheduled; - private final long containerMovesCompleted; - private final long containerMovesFailed; - private final long containerMovesTimeout; - private final Map sizeEnteringNodesGB; - private final Map sizeLeavingNodesGB; - - @SuppressWarnings("checkstyle:ParameterNumber") + + private final IterationInfo iterationInfo; + private final ContainerMoveInfo containerMoveInfo; + private final DataMoveInfo dataMoveInfo; + public ContainerBalancerTaskIterationStatusInfo( - Integer iterationNumber, - String iterationResult, - long sizeScheduledForMoveGB, - long dataSizeMovedGB, - long containerMovesScheduled, - long containerMovesCompleted, - long containerMovesFailed, - long containerMovesTimeout, - Map sizeEnteringNodesGB, - Map sizeLeavingNodesGB) { - this.iterationNumber = iterationNumber; - this.iterationResult = iterationResult; - this.sizeScheduledForMoveGB = sizeScheduledForMoveGB; - this.dataSizeMovedGB = dataSizeMovedGB; - this.containerMovesScheduled = containerMovesScheduled; - this.containerMovesCompleted = containerMovesCompleted; - this.containerMovesFailed = containerMovesFailed; - this.containerMovesTimeout = containerMovesTimeout; - this.sizeEnteringNodesGB = sizeEnteringNodesGB; - this.sizeLeavingNodesGB = sizeLeavingNodesGB; + IterationInfo iterationInfo, + ContainerMoveInfo containerMoveInfo, + DataMoveInfo dataMoveInfo) { + this.iterationInfo = iterationInfo; + this.containerMoveInfo = containerMoveInfo; + this.dataMoveInfo = dataMoveInfo; } + /** + * Get the number of iterations. + * @return iteration number + */ public Integer getIterationNumber() { - return iterationNumber; + return iterationInfo.getIterationNumber(); } + /** + * Get the iteration result. + * @return iteration result + */ public String getIterationResult() { - return iterationResult; + return iterationInfo.getIterationResult(); } - public long getSizeScheduledForMoveGB() { - return sizeScheduledForMoveGB; + /** + * Get the size of the bytes that are scheduled to move in the iteration. + * @return size in bytes + */ + public long getSizeScheduledForMove() { + return dataMoveInfo.getSizeScheduledForMove(); } - public long getDataSizeMovedGB() { - return dataSizeMovedGB; + /** + * Get the size of the bytes that were moved in the iteration. + * @return size in bytes + */ + public long getDataSizeMoved() { + return dataMoveInfo.getDataSizeMoved(); } + /** + * Get the number of containers scheduled to move. + * @return number of containers scheduled to move + */ public long getContainerMovesScheduled() { - return containerMovesScheduled; + return containerMoveInfo.getContainerMovesScheduled(); } + /** + * Get the number of successfully moved containers. + * @return number of successfully moved containers + */ public long getContainerMovesCompleted() { - return containerMovesCompleted; + return containerMoveInfo.getContainerMovesCompleted(); } + /** + * Get the number of containers that were not moved successfully. + * @return number of unsuccessfully moved containers + */ public long getContainerMovesFailed() { - return containerMovesFailed; + return containerMoveInfo.getContainerMovesFailed(); } + /** + * Get the number of containers moved with a timeout. + * @return number of moved with timeout containers + */ public long getContainerMovesTimeout() { - return containerMovesTimeout; + return containerMoveInfo.getContainerMovesTimeout(); + } + + /** + * Get a map of the node IDs and the corresponding data sizes moved to each node. + * @return nodeId to size entering from node map + */ + public Map getSizeEnteringNodes() { + return dataMoveInfo.getSizeEnteringNodes(); + } + + /** + * Get a map of the node IDs and the corresponding data sizes moved from each node. + * @return nodeId to size leaving from node map + */ + public Map getSizeLeavingNodes() { + return dataMoveInfo.getSizeLeavingNodes(); + } + + /** + * Get the iteration duration. + * @return iteration duration + */ + public Long getIterationDuration() { + return iterationInfo.getIterationDuration(); } - public Map getSizeEnteringNodesGB() { - return sizeEnteringNodesGB; + /** + * Converts an instance into the protobuf compatible object. + * @return proto representation + */ + public StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfoProto toProto() { + return StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfoProto.newBuilder() + .setIterationNumber(getIterationNumber()) + .setIterationResult(Optional.ofNullable(getIterationResult()).orElse("")) + .setIterationDuration(getIterationDuration()) + .setSizeScheduledForMove(getSizeScheduledForMove()) + .setDataSizeMoved(getDataSizeMoved()) + .setContainerMovesScheduled(getContainerMovesScheduled()) + .setContainerMovesCompleted(getContainerMovesCompleted()) + .setContainerMovesFailed(getContainerMovesFailed()) + .setContainerMovesTimeout(getContainerMovesTimeout()) + .addAllSizeEnteringNodes( + mapToProtoNodeTransferInfo(getSizeEnteringNodes()) + ) + .addAllSizeLeavingNodes( + mapToProtoNodeTransferInfo(getSizeLeavingNodes()) + ) + .build(); } - public Map getSizeLeavingNodesGB() { - return sizeLeavingNodesGB; + /** + * Converts an instance into the protobuf compatible object. + * @param nodes node id to node traffic size + * @return node transfer info proto representation + */ + private List mapToProtoNodeTransferInfo( + Map nodes + ) { + return nodes.entrySet() + .stream() + .map(entry -> StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() + .setUuid(entry.getKey().toString()) + .setDataVolume(entry.getValue()) + .build() + ) + .collect(Collectors.toList()); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerMoveInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerMoveInfo.java new file mode 100644 index 00000000000..caed286480b --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/ContainerMoveInfo.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.balancer; + +/** + * Information about moving containers. + */ +public class ContainerMoveInfo { + private final long containerMovesScheduled; + private final long containerMovesCompleted; + private final long containerMovesFailed; + private final long containerMovesTimeout; + + public ContainerMoveInfo(long containerMovesScheduled, long containerMovesCompleted, long containerMovesFailed, + long containerMovesTimeout) { + this.containerMovesScheduled = containerMovesScheduled; + this.containerMovesCompleted = containerMovesCompleted; + this.containerMovesFailed = containerMovesFailed; + this.containerMovesTimeout = containerMovesTimeout; + } + + public ContainerMoveInfo(ContainerBalancerMetrics metrics) { + this.containerMovesScheduled = metrics.getNumContainerMovesScheduledInLatestIteration(); + this.containerMovesCompleted = metrics.getNumContainerMovesCompletedInLatestIteration(); + this.containerMovesFailed = metrics.getNumContainerMovesFailedInLatestIteration(); + this.containerMovesTimeout = metrics.getNumContainerMovesTimeoutInLatestIteration(); + } + + public long getContainerMovesScheduled() { + return containerMovesScheduled; + } + + public long getContainerMovesCompleted() { + return containerMovesCompleted; + } + + public long getContainerMovesFailed() { + return containerMovesFailed; + } + + public long getContainerMovesTimeout() { + return containerMovesTimeout; + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/DataMoveInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/DataMoveInfo.java new file mode 100644 index 00000000000..cd97011768d --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/DataMoveInfo.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.balancer; + +import java.util.Map; +import java.util.UUID; + +/** + * Information about the process of moving data. + */ +public class DataMoveInfo { + private final long sizeScheduledForMove; + private final long dataSizeMoved; + private final Map sizeEnteringNodes; + private final Map sizeLeavingNodes; + + + public DataMoveInfo( + long sizeScheduledForMove, + long dataSizeMoved, + Map sizeEnteringNodes, + Map sizeLeavingNodes) { + this.sizeScheduledForMove = sizeScheduledForMove; + this.dataSizeMoved = dataSizeMoved; + this.sizeEnteringNodes = sizeEnteringNodes; + this.sizeLeavingNodes = sizeLeavingNodes; + } + + public long getSizeScheduledForMove() { + return sizeScheduledForMove; + } + + public long getDataSizeMoved() { + return dataSizeMoved; + } + + public Map getSizeEnteringNodes() { + return sizeEnteringNodes; + } + + public Map getSizeLeavingNodes() { + return sizeLeavingNodes; + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceGreedy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceGreedy.java index 57cc8b32b94..9773ae45f50 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceGreedy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceGreedy.java @@ -206,4 +206,9 @@ public void reInitialize(List potentialDataNodes, public Map getSizeLeavingNodes() { return sizeLeavingNode; } + + @Override + public void clearSizeLeavingNodes() { + sizeLeavingNode.clear(); + } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceStrategy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceStrategy.java index 9e429aaa21d..0043d8509b0 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceStrategy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindSourceStrategy.java @@ -87,5 +87,14 @@ void reInitialize(List potentialDataNodes, */ void resetPotentialSources(@Nonnull Collection sources); + /** + * Get a map of the node IDs and the corresponding data sizes moved from each node. + * @return nodeId to size leaving from node map + */ Map getSizeLeavingNodes(); + + /** + * Clear the map of node IDs and their corresponding data sizes that were moved from each node. + */ + void clearSizeLeavingNodes(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindTargetStrategy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindTargetStrategy.java index 389ea6e5192..8959fc4ff23 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindTargetStrategy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/FindTargetStrategy.java @@ -70,5 +70,14 @@ void reInitialize(List potentialDataNodes, */ void resetPotentialTargets(@Nonnull Collection targets); + /** + * Get a map of the node IDs and the corresponding data sizes moved to each node. + * @return nodeId to size entering from node map + */ Map getSizeEnteringNodes(); + + /** + * Clear the map of node IDs and their corresponding data sizes that were moved to each node. + */ + void clearSizeEnteringNodes(); } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/IterationInfo.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/IterationInfo.java new file mode 100644 index 00000000000..615848a097a --- /dev/null +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/balancer/IterationInfo.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm.container.balancer; + +/** + * Information about the iteration. + */ +public class IterationInfo { + + private final Integer iterationNumber; + private final String iterationResult; + private final Long iterationDuration; + + public IterationInfo(Integer iterationNumber, String iterationResult, long iterationDuration) { + this.iterationNumber = iterationNumber; + this.iterationResult = iterationResult; + this.iterationDuration = iterationDuration; + } + + public Integer getIterationNumber() { + return iterationNumber; + } + + public String getIterationResult() { + return iterationResult; + } + + public Long getIterationDuration() { + return iterationDuration; + } +} diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java index 6fdc81ac9a3..77cb1a26349 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java @@ -37,10 +37,8 @@ import org.apache.hadoop.hdds.protocol.proto.ReconfigureProtocolProtos.ReconfigureProtocolService; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto.Builder; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.NodeTransferInfo; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto; import org.apache.hadoop.hdds.protocolPB.ReconfigureProtocolPB; import org.apache.hadoop.hdds.protocolPB.ReconfigureProtocolServerSideTranslatorPB; @@ -1215,48 +1213,7 @@ public ContainerBalancerStatusInfoResponseProto getContainerBalancerStatusInfo() return ContainerBalancerStatusInfoResponseProto .newBuilder() .setIsRunning(true) - .setContainerBalancerStatusInfo(StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfo - .newBuilder() - .setStartedAt(balancerStatusInfo.getStartedAt().toEpochSecond()) - .setConfiguration(balancerStatusInfo.getConfiguration()) - .addAllIterationsStatusInfo( - balancerStatusInfo.getIterationsStatusInfo() - .stream() - .map( - info -> ContainerBalancerTaskIterationStatusInfo.newBuilder() - .setIterationNumber(info.getIterationNumber()) - .setIterationResult(Optional.ofNullable(info.getIterationResult()).orElse("")) - .setSizeScheduledForMoveGB(info.getSizeScheduledForMoveGB()) - .setDataSizeMovedGB(info.getDataSizeMovedGB()) - .setContainerMovesScheduled(info.getContainerMovesScheduled()) - .setContainerMovesCompleted(info.getContainerMovesCompleted()) - .setContainerMovesFailed(info.getContainerMovesFailed()) - .setContainerMovesTimeout(info.getContainerMovesTimeout()) - .addAllSizeEnteringNodesGB( - info.getSizeEnteringNodesGB().entrySet() - .stream() - .map(entry -> NodeTransferInfo.newBuilder() - .setUuid(entry.getKey().toString()) - .setDataVolumeGB(entry.getValue()) - .build() - ) - .collect(Collectors.toList()) - ) - .addAllSizeLeavingNodesGB( - info.getSizeLeavingNodesGB().entrySet() - .stream() - .map(entry -> NodeTransferInfo.newBuilder() - .setUuid(entry.getKey().toString()) - .setDataVolumeGB(entry.getValue()) - .build() - ) - .collect(Collectors.toList()) - ) - .build() - ) - .collect(Collectors.toList()) - ) - ) + .setContainerBalancerStatusInfo(balancerStatusInfo.toProto()) .build(); } } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/MockedSCM.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/MockedSCM.java index 0972e57df64..62a126da15c 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/MockedSCM.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/MockedSCM.java @@ -137,11 +137,34 @@ public String toString() { return task; } + public @Nonnull ContainerBalancerTask startBalancerTaskAsync( + @Nonnull ContainerBalancer containerBalancer, + @Nonnull ContainerBalancerConfiguration config, + Boolean withDelay) { + ContainerBalancerTask task = new ContainerBalancerTask(scm, 0, containerBalancer, + containerBalancer.getMetrics(), config, withDelay); + new Thread(task).start(); + return task; + } + public @Nonnull ContainerBalancerTask startBalancerTask(@Nonnull ContainerBalancerConfiguration config) { init(config, new OzoneConfiguration()); return startBalancerTask(new ContainerBalancer(scm), config); } + public @Nonnull ContainerBalancerTask startBalancerTaskAsync(@Nonnull ContainerBalancerConfiguration config, + OzoneConfiguration ozoneConfig, + Boolean withDelay) { + init(config, ozoneConfig); + return startBalancerTaskAsync(new ContainerBalancer(scm), config, withDelay); + } + + public @Nonnull ContainerBalancerTask startBalancerTaskAsync(@Nonnull ContainerBalancerConfiguration config, + Boolean withDelay) { + init(config, new OzoneConfiguration()); + return startBalancerTaskAsync(new ContainerBalancer(scm), config, withDelay); + } + public void enableLegacyReplicationManager() { mockedReplicaManager.conf.setEnableLegacy(true); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerStatusInfo.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerStatusInfo.java index 48b3ee2d0de..fafd43c69ff 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerStatusInfo.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/balancer/TestContainerBalancerStatusInfo.java @@ -18,16 +18,23 @@ package org.apache.hadoop.hdds.scm.container.balancer; +import org.apache.commons.math3.util.ArithmeticUtils; +import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.server.StorageContainerManager; import org.apache.hadoop.ozone.OzoneConsts; +import org.apache.ozone.test.LambdaTestUtils; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.util.List; +import java.util.Map; +import java.util.UUID; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; /** @@ -47,15 +54,210 @@ void testGetIterationStatistics() { ContainerBalancerTask task = mockedScm.startBalancerTask(config); List iterationStatistics = task.getCurrentIterationsStatistic(); - assertEquals(3, iterationStatistics.size()); - iterationStatistics.forEach(is -> { - assertTrue(is.getContainerMovesCompleted() > 0); - assertEquals(0, is.getContainerMovesFailed()); - assertEquals(0, is.getContainerMovesTimeout()); - assertFalse(is.getSizeEnteringNodesGB().isEmpty()); - assertFalse(is.getSizeLeavingNodesGB().isEmpty()); + assertEquals(2, iterationStatistics.size()); + + ContainerBalancerTaskIterationStatusInfo iterationHistory1 = iterationStatistics.get(0); + verifyCompletedIteration(iterationHistory1, 1); + + ContainerBalancerTaskIterationStatusInfo iterationHistory2 = iterationStatistics.get(1); + verifyCompletedIteration(iterationHistory2, 2); + } + + @Test + void testReRequestIterationStatistics() throws Exception { + MockedSCM mockedScm = new MockedSCM(new TestableCluster(20, OzoneConsts.GB)); + + ContainerBalancerConfiguration config = new OzoneConfiguration().getObject(ContainerBalancerConfiguration.class); + + config.setIterations(2); + config.setBalancingInterval(0); + config.setMaxSizeToMovePerIteration(50 * OzoneConsts.GB); + + ContainerBalancerTask task = mockedScm.startBalancerTask(config); + List firstRequestIterationStatistics = + task.getCurrentIterationsStatistic(); + Thread.sleep(1000L); + List secondRequestIterationStatistics = + task.getCurrentIterationsStatistic(); + assertEquals(firstRequestIterationStatistics.get(0), secondRequestIterationStatistics.get(0)); + assertEquals(firstRequestIterationStatistics.get(1), secondRequestIterationStatistics.get(1)); + } + + @Test + void testGetCurrentStatisticsRequestInPeriodBetweenIterations() throws Exception { + MockedSCM mockedScm = new MockedSCM(new TestableCluster(20, OzoneConsts.GB)); + + ContainerBalancerConfiguration config = new OzoneConfiguration().getObject(ContainerBalancerConfiguration.class); + + config.setIterations(2); + config.setBalancingInterval(10000); + config.setMaxSizeToMovePerIteration(50 * OzoneConsts.GB); + + ContainerBalancerTask task = mockedScm.startBalancerTaskAsync(config, false); + LambdaTestUtils.await(1000, 500, + () -> task.getCurrentIterationsStatistic().size() == 1 && + task.getCurrentIterationsStatistic().get(0).getIterationResult().equals("ITERATION_COMPLETED")); + List iterationsStatic = task.getCurrentIterationsStatistic(); + assertEquals(1, iterationsStatic.size()); + + ContainerBalancerTaskIterationStatusInfo firstIteration = iterationsStatic.get(0); + verifyCompletedIteration(firstIteration, 1); + } + + @Test + void testCurrentStatisticsDoesntChangeWhenReRequestInPeriodBetweenIterations() throws InterruptedException { + MockedSCM mockedScm = new MockedSCM(new TestableCluster(20, OzoneConsts.GB)); + + ContainerBalancerConfiguration config = new OzoneConfiguration().getObject(ContainerBalancerConfiguration.class); + + config.setIterations(2); + config.setBalancingInterval(10000); + config.setMaxSizeToMovePerIteration(50 * OzoneConsts.GB); + + ContainerBalancerTask task = mockedScm.startBalancerTaskAsync(config, false); + // Delay in finishing the first iteration + Thread.sleep(1000L); + List firstRequestIterationStatistics = + task.getCurrentIterationsStatistic(); + // Delay occurred for some time during the period between iterations. + Thread.sleep(1000L); + List secondRequestIterationStatistics = + task.getCurrentIterationsStatistic(); + assertEquals(1, firstRequestIterationStatistics.size()); + assertEquals(1, secondRequestIterationStatistics.size()); + assertEquals(firstRequestIterationStatistics.get(0), secondRequestIterationStatistics.get(0)); + } + + @Test + void testGetCurrentStatisticsWithDelay() throws Exception { + MockedSCM mockedScm = new MockedSCM(new TestableCluster(20, OzoneConsts.GB)); + + ContainerBalancerConfiguration config = new OzoneConfiguration().getObject(ContainerBalancerConfiguration.class); + + config.setIterations(2); + config.setBalancingInterval(0); + config.setMaxSizeToMovePerIteration(50 * OzoneConsts.GB); + OzoneConfiguration configuration = new OzoneConfiguration(); + configuration.set(HddsConfigKeys.HDDS_SCM_WAIT_TIME_AFTER_SAFE_MODE_EXIT, "1"); + ContainerBalancerTask task = mockedScm.startBalancerTaskAsync(config, configuration, true); + // Delay in finishing the first iteration + LambdaTestUtils.await(1100, 50, () -> task.getCurrentIterationsStatistic().size() == 1); + List iterationsStatic = task.getCurrentIterationsStatistic(); + assertEquals(1, iterationsStatic.size()); + ContainerBalancerTaskIterationStatusInfo currentIteration = iterationsStatic.get(0); + verifyStartedEmptyIteration(currentIteration); + } + + @Test + void testGetCurrentStatisticsWhileBalancingInProgress() throws Exception { + MockedSCM mockedScm = new MockedSCM(new TestableCluster(20, OzoneConsts.GB)); + + ContainerBalancerConfiguration config = new OzoneConfiguration().getObject(ContainerBalancerConfiguration.class); + + config.setIterations(3); + config.setBalancingInterval(0); + config.setMaxSizeToMovePerIteration(50 * OzoneConsts.GB); + + ContainerBalancerTask task = mockedScm.startBalancerTaskAsync(config, false); + // Get the current iteration statistics when it has information about the containers moving. + LambdaTestUtils.await(1000, 10, + () -> task.getCurrentIterationsStatistic().size() == 2 && + task.getCurrentIterationsStatistic().get(1).getContainerMovesScheduled() > 0); + List iterationsStatic = task.getCurrentIterationsStatistic(); + assertEquals(2, iterationsStatic.size()); + ContainerBalancerTaskIterationStatusInfo currentIteration = iterationsStatic.get(1); + assertCurrentIterationStatisticWhileBalancingInProgress(currentIteration); + } + + + private static void verifyUnstartedIteration(ContainerBalancerTaskIterationStatusInfo iterationsStatic) { + assertEquals(0, iterationsStatic.getIterationNumber()); + assertEquals(-1, iterationsStatic.getIterationDuration()); + assertNull(iterationsStatic.getIterationResult()); + assertEquals(0, iterationsStatic.getContainerMovesScheduled()); + assertEquals(0, iterationsStatic.getContainerMovesCompleted()); + assertEquals(0, iterationsStatic.getContainerMovesFailed()); + assertEquals(0, iterationsStatic.getContainerMovesTimeout()); + assertEquals(0, iterationsStatic.getSizeScheduledForMove()); + assertEquals(0, iterationsStatic.getDataSizeMoved()); + assertTrue(iterationsStatic.getSizeEnteringNodes().isEmpty()); + assertTrue(iterationsStatic.getSizeLeavingNodes().isEmpty()); + } + + private static void assertCurrentIterationStatisticWhileBalancingInProgress( + ContainerBalancerTaskIterationStatusInfo iterationsStatic + ) { + + assertEquals(2, iterationsStatic.getIterationNumber()); + assertEquals(0, iterationsStatic.getIterationDuration()); + assertNull(iterationsStatic.getIterationResult()); + assertTrue(iterationsStatic.getContainerMovesScheduled() > 0); + assertTrue(iterationsStatic.getContainerMovesCompleted() > 0); + assertEquals(0, iterationsStatic.getContainerMovesFailed()); + assertEquals(0, iterationsStatic.getContainerMovesTimeout()); + assertTrue(iterationsStatic.getSizeScheduledForMove() > 0); + assertTrue(iterationsStatic.getDataSizeMoved() > 0); + assertFalse(iterationsStatic.getSizeEnteringNodes().isEmpty()); + assertFalse(iterationsStatic.getSizeLeavingNodes().isEmpty()); + iterationsStatic.getSizeEnteringNodes().forEach((id, size) -> { + assertNotNull(id); + assertTrue(size > 0); }); + iterationsStatic.getSizeLeavingNodes().forEach((id, size) -> { + assertNotNull(id); + assertTrue(size > 0); + }); + Long enteringDataSum = getTotalMovedData(iterationsStatic.getSizeEnteringNodes()); + Long leavingDataSum = getTotalMovedData(iterationsStatic.getSizeLeavingNodes()); + assertEquals(enteringDataSum, leavingDataSum); + } + + private void verifyCompletedIteration( + ContainerBalancerTaskIterationStatusInfo iteration, + Integer expectedIterationNumber + ) { + assertEquals(expectedIterationNumber, iteration.getIterationNumber()); + assertEquals("ITERATION_COMPLETED", iteration.getIterationResult()); + assertNotNull(iteration.getIterationDuration()); + assertTrue(iteration.getContainerMovesScheduled() > 0); + assertTrue(iteration.getContainerMovesCompleted() > 0); + assertEquals(0, iteration.getContainerMovesFailed()); + assertEquals(0, iteration.getContainerMovesTimeout()); + assertTrue(iteration.getSizeScheduledForMove() > 0); + assertTrue(iteration.getDataSizeMoved() > 0); + assertFalse(iteration.getSizeEnteringNodes().isEmpty()); + assertFalse(iteration.getSizeLeavingNodes().isEmpty()); + iteration.getSizeEnteringNodes().forEach((id, size) -> { + assertNotNull(id); + assertTrue(size > 0); + }); + iteration.getSizeLeavingNodes().forEach((id, size) -> { + assertNotNull(id); + assertTrue(size > 0); + }); + Long enteringDataSum = getTotalMovedData(iteration.getSizeEnteringNodes()); + Long leavingDataSum = getTotalMovedData(iteration.getSizeLeavingNodes()); + assertEquals(enteringDataSum, leavingDataSum); + } + + private void verifyStartedEmptyIteration( + ContainerBalancerTaskIterationStatusInfo iteration + ) { + assertEquals(1, iteration.getIterationNumber()); + assertNull(iteration.getIterationResult()); + assertNotNull(iteration.getIterationDuration()); + assertEquals(0, iteration.getContainerMovesScheduled()); + assertEquals(0, iteration.getContainerMovesCompleted()); + assertEquals(0, iteration.getContainerMovesFailed()); + assertEquals(0, iteration.getContainerMovesTimeout()); + assertEquals(0, iteration.getSizeScheduledForMove()); + assertEquals(0, iteration.getDataSizeMoved()); + assertTrue(iteration.getSizeEnteringNodes().isEmpty()); + assertTrue(iteration.getSizeLeavingNodes().isEmpty()); + } + private static Long getTotalMovedData(Map iteration) { + return iteration.values().stream().reduce(0L, ArithmeticUtils::addAndCheck); } /** diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStatusSubcommand.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStatusSubcommand.java index e58074bf140..9d7c270c962 100644 --- a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStatusSubcommand.java +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStatusSubcommand.java @@ -19,9 +19,9 @@ import org.apache.hadoop.hdds.cli.HddsVersionProvider; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfo; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfoProto; import org.apache.hadoop.hdds.scm.client.ScmClient; import org.apache.hadoop.ozone.OzoneConsts; import picocli.CommandLine; @@ -31,10 +31,14 @@ import java.time.Duration; import java.time.Instant; import java.time.LocalDateTime; +import java.time.OffsetDateTime; import java.time.ZoneId; import java.util.List; import java.util.stream.Collectors; +import static org.apache.hadoop.hdds.util.DurationUtil.getPrettyDuration; +import static org.apache.hadoop.util.StringUtils.byteDesc; + /** * Handler to query status of container balancer. */ @@ -58,27 +62,42 @@ public class ContainerBalancerStatusSubcommand extends ScmSubcommand { public void execute(ScmClient scmClient) throws IOException { ContainerBalancerStatusInfoResponseProto response = scmClient.getContainerBalancerStatusInfo(); boolean isRunning = response.getIsRunning(); - ContainerBalancerStatusInfo balancerStatusInfo = response.getContainerBalancerStatusInfo(); + ContainerBalancerStatusInfoProto balancerStatusInfo = response.getContainerBalancerStatusInfo(); if (isRunning) { + Instant startedAtInstant = Instant.ofEpochSecond(balancerStatusInfo.getStartedAt()); LocalDateTime dateTime = - LocalDateTime.ofInstant(Instant.ofEpochSecond(balancerStatusInfo.getStartedAt()), ZoneId.systemDefault()); + LocalDateTime.ofInstant(startedAtInstant, ZoneId.systemDefault()); System.out.println("ContainerBalancer is Running."); if (verbose) { - System.out.printf("Started at: %s %s%n%n", dateTime.toLocalDate(), dateTime.toLocalTime()); + System.out.printf("Started at: %s %s%n", dateTime.toLocalDate(), dateTime.toLocalTime()); + Duration balancingDuration = Duration.between(startedAtInstant, OffsetDateTime.now()); + System.out.printf("Balancing duration: %s%n%n", getPrettyDuration(balancingDuration)); System.out.println(getConfigurationPrettyString(balancerStatusInfo.getConfiguration())); - List iterationsStatusInfoList + List iterationsStatusInfoList = balancerStatusInfo.getIterationsStatusInfoList(); System.out.println("Current iteration info:"); - System.out.println( - getPrettyIterationStatusInfo(iterationsStatusInfoList.get(iterationsStatusInfoList.size() - 1)) - ); + ContainerBalancerTaskIterationStatusInfoProto currentIterationStatistic = iterationsStatusInfoList.stream() + .filter(it -> it.getIterationResult().isEmpty()) + .findFirst() + .orElse(null); + if (currentIterationStatistic == null) { + System.out.println("-\n"); + } else { + System.out.println( + getPrettyIterationStatusInfo(currentIterationStatistic) + ); + } + if (verboseWithHistory) { System.out.println("Iteration history list:"); System.out.println( - iterationsStatusInfoList.stream().map(this::getPrettyIterationStatusInfo) + iterationsStatusInfoList + .stream() + .filter(it -> !it.getIterationResult().isEmpty()) + .map(this::getPrettyIterationStatusInfo) .collect(Collectors.joining("\n")) ); } @@ -134,21 +153,28 @@ String getConfigurationPrettyString(HddsProtos.ContainerBalancerConfigurationPro configuration.getExcludeDatanodes().isEmpty() ? "None" : configuration.getExcludeDatanodes()); } - private String getPrettyIterationStatusInfo(ContainerBalancerTaskIterationStatusInfo iterationStatusInfo) { + private String getPrettyIterationStatusInfo(ContainerBalancerTaskIterationStatusInfoProto iterationStatusInfo) { int iterationNumber = iterationStatusInfo.getIterationNumber(); String iterationResult = iterationStatusInfo.getIterationResult(); - long sizeScheduledForMove = iterationStatusInfo.getSizeScheduledForMoveGB(); - long dataSizeMovedGB = iterationStatusInfo.getDataSizeMovedGB(); + long iterationDuration = iterationStatusInfo.getIterationDuration(); + long sizeScheduledForMove = iterationStatusInfo.getSizeScheduledForMove(); + long dataSizeMoved = iterationStatusInfo.getDataSizeMoved(); long containerMovesScheduled = iterationStatusInfo.getContainerMovesScheduled(); long containerMovesCompleted = iterationStatusInfo.getContainerMovesCompleted(); long containerMovesFailed = iterationStatusInfo.getContainerMovesFailed(); long containerMovesTimeout = iterationStatusInfo.getContainerMovesTimeout(); - String enteringDataNodeList = iterationStatusInfo.getSizeEnteringNodesGBList() - .stream().map(nodeInfo -> nodeInfo.getUuid() + " <- " + nodeInfo.getDataVolumeGB() + "\n") + String enteringDataNodeList = iterationStatusInfo.getSizeEnteringNodesList() + .stream().map(nodeInfo -> nodeInfo.getUuid() + " <- " + byteDesc(nodeInfo.getDataVolume()) + "\n") .collect(Collectors.joining()); - String leavingDataNodeList = iterationStatusInfo.getSizeLeavingNodesGBList() - .stream().map(nodeInfo -> nodeInfo.getUuid() + " -> " + nodeInfo.getDataVolumeGB() + "\n") + if (enteringDataNodeList.isEmpty()) { + enteringDataNodeList = " -\n"; + } + String leavingDataNodeList = iterationStatusInfo.getSizeLeavingNodesList() + .stream().map(nodeInfo -> nodeInfo.getUuid() + " -> " + byteDesc(nodeInfo.getDataVolume()) + "\n") .collect(Collectors.joining()); + if (leavingDataNodeList.isEmpty()) { + leavingDataNodeList = " -\n"; + } return String.format( "%-50s %s%n" + "%-50s %s%n" + @@ -159,14 +185,16 @@ private String getPrettyIterationStatusInfo(ContainerBalancerTaskIterationStatus "%-50s %s%n" + "%-50s %s%n" + "%-50s %s%n" + + "%-50s %s%n" + "%-50s %n%s" + "%-50s %n%s", "Key", "Value", - "Iteration number", iterationNumber, + "Iteration number", iterationNumber == 0 ? "-" : iterationNumber, + "Iteration duration", getPrettyDuration(Duration.ofSeconds(iterationDuration)), "Iteration result", - iterationResult.isEmpty() ? "IN_PROGRESS" : iterationResult, - "Size scheduled to move", sizeScheduledForMove, - "Moved data size", dataSizeMovedGB, + iterationResult.isEmpty() ? "-" : iterationResult, + "Size scheduled to move", byteDesc(sizeScheduledForMove), + "Moved data size", byteDesc(dataSizeMoved), "Scheduled to move containers", containerMovesScheduled, "Already moved containers", containerMovesCompleted, "Failed to move containers", containerMovesFailed, @@ -174,5 +202,6 @@ private String getPrettyIterationStatusInfo(ContainerBalancerTaskIterationStatus "Entered data to nodes", enteringDataNodeList, "Exited data from nodes", leavingDataNodeList); } + } diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/util/DurationUtil.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/util/DurationUtil.java new file mode 100644 index 00000000000..7b2ded9b13d --- /dev/null +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/util/DurationUtil.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.util; + +import java.time.Duration; + +import static java.lang.String.format; + +/** + * Pretty duration string representation. + */ +public final class DurationUtil { + + private DurationUtil() { + } + + /** + * Modify duration to string view. E.x. 1h 30m 45s, 2m 30s, 30s. + * + * @param duration duration + * @return duration in string format + */ + public static String getPrettyDuration(Duration duration) { + long hours = duration.toHours(); + long minutes = duration.getSeconds() / 60 % 60; + long seconds = duration.getSeconds() % 60; + if (hours > 0) { + return format("%dh %dm %ds", hours, minutes, seconds); + } else if (minutes > 0) { + return format("%dm %ds", minutes, seconds); + } else if (seconds >= 0) { + return format("%ds", seconds); + } else { + throw new IllegalStateException("Provided duration is incorrect: " + duration); + } + } +} diff --git a/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/util/package-info.java b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/util/package-info.java new file mode 100644 index 00000000000..6dd25c12c53 --- /dev/null +++ b/hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/util/package-info.java @@ -0,0 +1,22 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *

+ */ +/** + * SCM related cli utils. + */ +package org.apache.hadoop.hdds.util; diff --git a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestContainerBalancerSubCommand.java b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestContainerBalancerSubCommand.java index 41b419d2326..bdce0f5d707 100644 --- a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestContainerBalancerSubCommand.java +++ b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/scm/cli/datanode/TestContainerBalancerSubCommand.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hdds.scm.cli.datanode; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos; -import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfo; +import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto; import org.apache.hadoop.hdds.scm.cli.ContainerBalancerStartSubcommand; import org.apache.hadoop.hdds.scm.cli.ContainerBalancerStatusSubcommand; @@ -28,6 +28,7 @@ import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import picocli.CommandLine; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -39,6 +40,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import static org.apache.hadoop.ozone.OzoneConsts.GB; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -58,175 +61,393 @@ class TestContainerBalancerSubCommand { private ContainerBalancerStartSubcommand startCmd; private ContainerBalancerStatusSubcommand statusCmd; - @BeforeEach - public void setup() throws UnsupportedEncodingException { - stopCmd = new ContainerBalancerStopSubcommand(); - startCmd = new ContainerBalancerStartSubcommand(); - statusCmd = new ContainerBalancerStatusSubcommand(); - System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); - System.setErr(new PrintStream(errContent, false, DEFAULT_ENCODING)); - } - - @AfterEach - public void tearDown() { - System.setOut(originalOut); - System.setErr(originalErr); - } - - @Test - public void testContainerBalancerStatusInfoSubcommandRunning() - throws IOException { - ScmClient scmClient = mock(ScmClient.class); - - ContainerBalancerConfiguration config = new ContainerBalancerConfiguration(); - config.setThreshold(10); - config.setMaxDatanodesPercentageToInvolvePerIteration(20); - config.setMaxSizeToMovePerIteration(53687091200L); - config.setMaxSizeEnteringTarget(27917287424L); - config.setMaxSizeLeavingSource(27917287424L); - config.setIterations(2); - config.setExcludeNodes(""); - config.setMoveTimeout(3900000); - config.setMoveReplicationTimeout(3000000); - config.setBalancingInterval(0); - config.setIncludeNodes(""); - config.setExcludeNodes(""); - config.setNetworkTopologyEnable(false); - config.setTriggerDuEnable(false); - - StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo iteration0StatusInfo = - StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo.newBuilder() - .setIterationNumber(0) + private static ContainerBalancerStatusInfoResponseProto getContainerBalancerStatusInfoResponseProto( + ContainerBalancerConfiguration config) { + StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfoProto iteration1StatusInfo = + StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfoProto.newBuilder() + .setIterationNumber(1) .setIterationResult("ITERATION_COMPLETED") - .setSizeScheduledForMoveGB(48) - .setDataSizeMovedGB(48) + .setIterationDuration(400L) + .setSizeScheduledForMove(54 * GB) + .setDataSizeMoved(54 * GB) .setContainerMovesScheduled(11) .setContainerMovesCompleted(11) .setContainerMovesFailed(0) .setContainerMovesTimeout(0) - .addSizeEnteringNodesGB( - StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .addSizeEnteringNodes( + StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() .setUuid("80f6bc27-e6f3-493e-b1f4-25f810ad960d") - .setDataVolumeGB(27) + .setDataVolume(28 * GB) .build() ) - .addSizeEnteringNodesGB( - StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .addSizeEnteringNodes( + StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() .setUuid("701ca98e-aa1a-4b36-b817-e28ed634bba6") - .setDataVolumeGB(23L) + .setDataVolume(26 * GB) .build() ) - .addSizeLeavingNodesGB( - StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .addSizeLeavingNodes( + StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() .setUuid("b8b9c511-c30f-4933-8938-2f272e307070") - .setDataVolumeGB(24L) + .setDataVolume(25 * GB) .build() ) - .addSizeLeavingNodesGB( - StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .addSizeLeavingNodes( + StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() .setUuid("7bd99815-47e7-4015-bc61-ca6ef6dfd130") - .setDataVolumeGB(26L) + .setDataVolume(29 * GB) .build() ) .build(); - StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo iteration1StatusInfo = - StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo.newBuilder() - .setIterationNumber(1) + StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfoProto iteration2StatusInfo = + StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfoProto.newBuilder() + .setIterationNumber(2) .setIterationResult("ITERATION_COMPLETED") - .setSizeScheduledForMoveGB(48) - .setDataSizeMovedGB(48) - .setContainerMovesScheduled(11) - .setContainerMovesCompleted(11) + .setIterationDuration(300L) + .setSizeScheduledForMove(30 * GB) + .setDataSizeMoved(30 * GB) + .setContainerMovesScheduled(8) + .setContainerMovesCompleted(8) .setContainerMovesFailed(0) .setContainerMovesTimeout(0) - .addSizeEnteringNodesGB( - StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .addSizeEnteringNodes( + StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() .setUuid("80f6bc27-e6f3-493e-b1f4-25f810ad960d") - .setDataVolumeGB(27L) + .setDataVolume(20 * GB) .build() ) - .addSizeEnteringNodesGB( - StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .addSizeEnteringNodes( + StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() .setUuid("701ca98e-aa1a-4b36-b817-e28ed634bba6") - .setDataVolumeGB(23L) + .setDataVolume(10 * GB) .build() ) - .addSizeLeavingNodesGB( - StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .addSizeLeavingNodes( + StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() .setUuid("b8b9c511-c30f-4933-8938-2f272e307070") - .setDataVolumeGB(24L) + .setDataVolume(15 * GB) .build() ) - .addSizeLeavingNodesGB( - StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .addSizeLeavingNodes( + StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() .setUuid("7bd99815-47e7-4015-bc61-ca6ef6dfd130") - .setDataVolumeGB(26L) + .setDataVolume(15 * GB) .build() ) .build(); - StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo iteration2StatusInfo = - StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfo.newBuilder() - .setIterationNumber(1) + StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfoProto iteration3StatusInfo = + StorageContainerLocationProtocolProtos.ContainerBalancerTaskIterationStatusInfoProto.newBuilder() + .setIterationNumber(3) .setIterationResult("") - .setSizeScheduledForMoveGB(48) - .setDataSizeMovedGB(48) - .setContainerMovesScheduled(11) - .setContainerMovesCompleted(11) + .setIterationDuration(370L) + .setSizeScheduledForMove(48 * GB) + .setDataSizeMoved(48 * GB) + .setContainerMovesScheduled(5) + .setContainerMovesCompleted(5) .setContainerMovesFailed(0) .setContainerMovesTimeout(0) - .addSizeEnteringNodesGB( - StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .addSizeEnteringNodes( + StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() .setUuid("80f6bc27-e6f3-493e-b1f4-25f810ad960d") - .setDataVolumeGB(27L) + .setDataVolume(20 * GB) .build() ) - .addSizeEnteringNodesGB( - StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .addSizeEnteringNodes( + StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() .setUuid("701ca98e-aa1a-4b36-b817-e28ed634bba6") - .setDataVolumeGB(23L) + .setDataVolume(28 * GB) .build() ) - .addSizeLeavingNodesGB( - StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .addSizeLeavingNodes( + StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() .setUuid("b8b9c511-c30f-4933-8938-2f272e307070") - .setDataVolumeGB(24L) + .setDataVolume(30 * GB) .build() ) - .addSizeLeavingNodesGB( - StorageContainerLocationProtocolProtos.NodeTransferInfo.newBuilder() + .addSizeLeavingNodes( + StorageContainerLocationProtocolProtos.NodeTransferInfoProto.newBuilder() .setUuid("7bd99815-47e7-4015-bc61-ca6ef6dfd130") - .setDataVolumeGB(26L) + .setDataVolume(18 * GB) .build() ) .build(); - ContainerBalancerStatusInfoResponseProto statusInfoResponseProto = - ContainerBalancerStatusInfoResponseProto.newBuilder() + return ContainerBalancerStatusInfoResponseProto.newBuilder() .setIsRunning(true) - .setContainerBalancerStatusInfo(ContainerBalancerStatusInfo.newBuilder() + .setContainerBalancerStatusInfo(ContainerBalancerStatusInfoProto.newBuilder() .setStartedAt(OffsetDateTime.now().toEpochSecond()) .setConfiguration(config.toProtobufBuilder().setShouldRun(true)) .addAllIterationsStatusInfo( - Arrays.asList(iteration0StatusInfo, iteration1StatusInfo, iteration2StatusInfo) + Arrays.asList(iteration1StatusInfo, iteration2StatusInfo, iteration3StatusInfo) ) ) .build(); + } + + private static ContainerBalancerConfiguration getContainerBalancerConfiguration() { + ContainerBalancerConfiguration config = new ContainerBalancerConfiguration(); + config.setThreshold(10); + config.setMaxDatanodesPercentageToInvolvePerIteration(20); + config.setMaxSizeToMovePerIteration(53687091200L); + config.setMaxSizeEnteringTarget(27917287424L); + config.setMaxSizeLeavingSource(27917287424L); + config.setIterations(3); + config.setExcludeNodes(""); + config.setMoveTimeout(3900000); + config.setMoveReplicationTimeout(3000000); + config.setBalancingInterval(0); + config.setIncludeNodes(""); + config.setExcludeNodes(""); + config.setNetworkTopologyEnable(false); + config.setTriggerDuEnable(false); + return config; + } + + @BeforeEach + public void setup() throws UnsupportedEncodingException { + stopCmd = new ContainerBalancerStopSubcommand(); + startCmd = new ContainerBalancerStartSubcommand(); + statusCmd = new ContainerBalancerStatusSubcommand(); + System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING)); + System.setErr(new PrintStream(errContent, false, DEFAULT_ENCODING)); + } + + @AfterEach + public void tearDown() { + System.setOut(originalOut); + System.setErr(originalErr); + } + + @Test + void testContainerBalancerStatusInfoSubcommandRunningWithoutFlags() + throws IOException { + ScmClient scmClient = mock(ScmClient.class); + + ContainerBalancerConfiguration config = + getContainerBalancerConfiguration(); + + ContainerBalancerStatusInfoResponseProto + statusInfoResponseProto = getContainerBalancerStatusInfoResponseProto(config); //test status is running when(scmClient.getContainerBalancerStatusInfo()).thenReturn(statusInfoResponseProto); - statusCmd.execute(scmClient); Pattern p = Pattern.compile( "^ContainerBalancer\\sis\\sRunning."); - Matcher m = p.matcher(outContent.toString(DEFAULT_ENCODING)); + String output = outContent.toString(DEFAULT_ENCODING); + Matcher m = p.matcher(output); assertTrue(m.find()); + + String balancerConfigOutput = + "Container Balancer Configuration values:\n" + + "Key Value\n" + + "Threshold 10.0\n" + + "Max Datanodes to Involve per Iteration(percent) 20\n" + + "Max Size to Move per Iteration 0GB\n" + + "Max Size Entering Target per Iteration 26GB\n" + + "Max Size Leaving Source per Iteration 26GB\n" + + "Number of Iterations 3\n" + + "Time Limit for Single Container's Movement 65min\n" + + "Time Limit for Single Container's Replication 50min\n" + + "Interval between each Iteration 0min\n" + + "Whether to Enable Network Topology false\n" + + "Whether to Trigger Refresh Datanode Usage Info false\n" + + "Container IDs to Exclude from Balancing None\n" + + "Datanodes Specified to be Balanced None\n" + + "Datanodes Excluded from Balancing None"; + assertFalse(output.contains(balancerConfigOutput)); + + String currentIterationOutput = + "Current iteration info:\n" + + "Key Value\n" + + "Iteration number 3\n" + + "Iteration duration 1h 6m 40s\n" + + "Iteration result IN_PROGRESS\n" + + "Size scheduled to move 48 GB\n" + + "Moved data size 48 GB\n" + + "Scheduled to move containers 11\n" + + "Already moved containers 11\n" + + "Failed to move containers 0\n" + + "Failed to move containers by timeout 0\n" + + "Entered data to nodes \n" + + "80f6bc27-e6f3-493e-b1f4-25f810ad960d <- 20 GB\n" + + "701ca98e-aa1a-4b36-b817-e28ed634bba6 <- 28 GB\n" + + "Exited data from nodes \n" + + "b8b9c511-c30f-4933-8938-2f272e307070 -> 30 GB\n" + + "7bd99815-47e7-4015-bc61-ca6ef6dfd130 -> 18 GB"; + assertFalse(output.contains(currentIterationOutput)); + + assertFalse(output.contains("Iteration history list:")); } @Test - public void testContainerBalancerStatusInfoSubcommandRunningOnStoppedBalancer() + void testContainerBalancerStatusInfoSubcommandVerboseHistory() throws IOException { ScmClient scmClient = mock(ScmClient.class); + ContainerBalancerConfiguration config = + getContainerBalancerConfiguration(); + + ContainerBalancerStatusInfoResponseProto + statusInfoResponseProto = getContainerBalancerStatusInfoResponseProto(config); //test status is running + when(scmClient.getContainerBalancerStatusInfo()).thenReturn(statusInfoResponseProto); + CommandLine c = new CommandLine(statusCmd); + c.parseArgs("--verbose", "--history"); + statusCmd.execute(scmClient); + String output = outContent.toString(DEFAULT_ENCODING); + Pattern p = Pattern.compile( + "^ContainerBalancer\\sis\\sRunning.$", Pattern.MULTILINE); + Matcher m = p.matcher(output); + assertTrue(m.find()); + + p = Pattern.compile( + "^Started at: (\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})$", Pattern.MULTILINE); + m = p.matcher(output); + assertTrue(m.find()); + + p = Pattern.compile( + "^Balancing duration: \\d{1}s$", Pattern.MULTILINE); + m = p.matcher(output); + assertTrue(m.find()); + + String balancerConfigOutput = + "Container Balancer Configuration values:\n" + + "Key Value\n" + + "Threshold 10.0\n" + + "Max Datanodes to Involve per Iteration(percent) 20\n" + + "Max Size to Move per Iteration 0GB\n" + + "Max Size Entering Target per Iteration 26GB\n" + + "Max Size Leaving Source per Iteration 26GB\n" + + "Number of Iterations 3\n" + + "Time Limit for Single Container's Movement 65min\n" + + "Time Limit for Single Container's Replication 50min\n" + + "Interval between each Iteration 0min\n" + + "Whether to Enable Network Topology false\n" + + "Whether to Trigger Refresh Datanode Usage Info false\n" + + "Container IDs to Exclude from Balancing None\n" + + "Datanodes Specified to be Balanced None\n" + + "Datanodes Excluded from Balancing None"; + assertTrue(output.contains(balancerConfigOutput)); + + assertTrue(output.contains("Iteration history list:")); + String firstHistoryIterationOutput = + "Key Value\n" + + "Iteration number 3\n" + + "Iteration duration 6m 10s\n" + + "Iteration result -\n" + + "Size scheduled to move 48 GB\n" + + "Moved data size 48 GB\n" + + "Scheduled to move containers 5\n" + + "Already moved containers 5\n" + + "Failed to move containers 0\n" + + "Failed to move containers by timeout 0\n" + + "Entered data to nodes \n" + + "80f6bc27-e6f3-493e-b1f4-25f810ad960d <- 20 GB\n" + + "701ca98e-aa1a-4b36-b817-e28ed634bba6 <- 28 GB\n" + + "Exited data from nodes \n" + + "b8b9c511-c30f-4933-8938-2f272e307070 -> 30 GB\n" + + "7bd99815-47e7-4015-bc61-ca6ef6dfd130 -> 18 GB"; + assertTrue(output.contains(firstHistoryIterationOutput)); + + String secondHistoryIterationOutput = + "Key Value\n" + + "Iteration number 2\n" + + "Iteration duration 5m 0s\n" + + "Iteration result ITERATION_COMPLETED\n" + + "Size scheduled to move 30 GB\n" + + "Moved data size 30 GB\n" + + "Scheduled to move containers 8\n" + + "Already moved containers 8\n" + + "Failed to move containers 0\n" + + "Failed to move containers by timeout 0\n" + + "Entered data to nodes \n" + + "80f6bc27-e6f3-493e-b1f4-25f810ad960d <- 20 GB\n" + + "701ca98e-aa1a-4b36-b817-e28ed634bba6 <- 10 GB\n" + + "Exited data from nodes \n" + + "b8b9c511-c30f-4933-8938-2f272e307070 -> 15 GB\n" + + "7bd99815-47e7-4015-bc61-ca6ef6dfd130 -> 15 GB"; + assertTrue(output.contains(secondHistoryIterationOutput)); + } + + @Test + void testContainerBalancerStatusInfoSubcommandVerbose() + throws IOException { + ScmClient scmClient = mock(ScmClient.class); + + ContainerBalancerConfiguration config = + getContainerBalancerConfiguration(); + + ContainerBalancerStatusInfoResponseProto + statusInfoResponseProto = getContainerBalancerStatusInfoResponseProto(config); + //test status is running + when(scmClient.getContainerBalancerStatusInfo()).thenReturn(statusInfoResponseProto); + CommandLine c = new CommandLine(statusCmd); + c.parseArgs("--verbose"); + statusCmd.execute(scmClient); + String output = outContent.toString(DEFAULT_ENCODING); + Pattern p = Pattern.compile( + "^ContainerBalancer\\sis\\sRunning.$", Pattern.MULTILINE); + Matcher m = p.matcher(output); + assertTrue(m.find()); + + p = Pattern.compile( + "^Started at: (\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2})$", Pattern.MULTILINE); + m = p.matcher(output); + assertTrue(m.find()); + + p = Pattern.compile( + "^Balancing duration: \\d{1}s$", Pattern.MULTILINE); + m = p.matcher(output); + assertTrue(m.find()); + + String balancerConfigOutput = + "Container Balancer Configuration values:\n" + + "Key Value\n" + + "Threshold 10.0\n" + + "Max Datanodes to Involve per Iteration(percent) 20\n" + + "Max Size to Move per Iteration 0GB\n" + + "Max Size Entering Target per Iteration 26GB\n" + + "Max Size Leaving Source per Iteration 26GB\n" + + "Number of Iterations 3\n" + + "Time Limit for Single Container's Movement 65min\n" + + "Time Limit for Single Container's Replication 50min\n" + + "Interval between each Iteration 0min\n" + + "Whether to Enable Network Topology false\n" + + "Whether to Trigger Refresh Datanode Usage Info false\n" + + "Container IDs to Exclude from Balancing None\n" + + "Datanodes Specified to be Balanced None\n" + + "Datanodes Excluded from Balancing None"; + assertTrue(output.contains(balancerConfigOutput)); + + String currentIterationOutput = + "Current iteration info:\n" + + "Key Value\n" + + "Iteration number 3\n" + + "Iteration duration 6m 10s\n" + + "Iteration result -\n" + + "Size scheduled to move 48 GB\n" + + "Moved data size 48 GB\n" + + "Scheduled to move containers 5\n" + + "Already moved containers 5\n" + + "Failed to move containers 0\n" + + "Failed to move containers by timeout 0\n" + + "Entered data to nodes \n" + + "80f6bc27-e6f3-493e-b1f4-25f810ad960d <- 20 GB\n" + + "701ca98e-aa1a-4b36-b817-e28ed634bba6 <- 28 GB\n" + + "Exited data from nodes \n" + + "b8b9c511-c30f-4933-8938-2f272e307070 -> 30 GB\n" + + "7bd99815-47e7-4015-bc61-ca6ef6dfd130 -> 18 GB"; + assertTrue(output.contains(currentIterationOutput)); + + assertFalse(output.contains("Iteration history list:")); + } + + @Test + void testContainerBalancerStatusInfoSubcommandRunningOnStoppedBalancer() + throws IOException { + ScmClient scmClient = mock(ScmClient.class); + + //test status is not running when(scmClient.getContainerBalancerStatusInfo()).thenReturn( ContainerBalancerStatusInfoResponseProto.newBuilder() .setIsRunning(false) diff --git a/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/util/TestDurationUtil.java b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/util/TestDurationUtil.java new file mode 100644 index 00000000000..7b0a9548639 --- /dev/null +++ b/hadoop-hdds/tools/src/test/java/org/apache/hadoop/hdds/util/TestDurationUtil.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.util; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.time.Duration; +import java.util.Arrays; +import java.util.Collection; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.params.provider.Arguments.arguments; + +class TestDurationUtil { + + private static Stream paramsForPositiveCases() { + return Stream.of( + arguments( + "0s", + Duration.ZERO + ), + arguments( + "2562047788015215h 30m 7s", + Duration.ofSeconds(Long.MAX_VALUE) + ), + arguments( + "1s", + Duration.ofSeconds(1) + ), + arguments( + "30s", + Duration.ofSeconds(30) + ), + arguments( + "1m 0s", + Duration.ofMinutes(1) + ), + arguments( + "2m 30s", + Duration.ofMinutes(2).plusSeconds(30) + ), + arguments( + "1h 30m 45s", + Duration.ofHours(1).plusMinutes(30).plusSeconds(45) + ), + arguments( + "24h 0m 0s", + Duration.ofDays(1) + ), + arguments( + "48h 0m 0s", + Duration.ofDays(2) + ) + ); + } + + private static Collection paramsForNegativeCases() { + return Arrays.asList(Duration.ofSeconds(-1L), Duration.ofSeconds(Long.MIN_VALUE)); + } + + @ParameterizedTest + @MethodSource("paramsForPositiveCases") + void testDuration(String expected, Duration actual) { + assertEquals(expected, DurationUtil.getPrettyDuration(actual)); + } + + @ParameterizedTest + @MethodSource("paramsForNegativeCases") + void testDuration(Duration param) { + assertThrows(IllegalStateException.class, () -> DurationUtil.getPrettyDuration(param)); + } +} + diff --git a/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-config b/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-config index f4866c4240d..6e0781a1d9e 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-config @@ -54,7 +54,8 @@ OZONE-SITE.XML_ozone.om.s3.grpc.server_enabled=true OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon OZONE-SITE.XML_dfs.container.ratis.datastream.enabled=true OZONE-SITE.XML_ozone.http.basedir=/tmp/ozone_http - +OZONE-SITE.XML_hdds.container.balancer.balancing.iteration.interval=25s +OZONE-SITE.XML_hdds.container.balancer.trigger.du.before.move.enable=false OZONE_CONF_DIR=/etc/hadoop OZONE_LOG_DIR=/var/log/hadoop diff --git a/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot b/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot index 343e4e68faf..608fd27f8e6 100644 --- a/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot +++ b/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot @@ -62,7 +62,7 @@ Datanode Recommission is Finished Should Not Contain ${result} ENTERING_MAINTENANCE Run Container Balancer - ${result} = Execute ozone admin containerbalancer start -t 0.1 -d 100 -i 1 + ${result} = Execute ozone admin containerbalancer start -t 0.1 -d 100 -i 3 Should Contain ${result} Container Balancer started successfully. Wait Finish Of Balancing @@ -71,19 +71,27 @@ Wait Finish Of Balancing Sleep 60000ms -Verify Verbose Balancer Status - [arguments] ${output} - +Verify Balancer Iteration + [arguments] ${output} ${number} Should Contain ${output} ContainerBalancer is Running. Should Contain ${output} Started at: Should Contain ${output} Container Balancer Configuration values: - -Verify Balancer Iteration - [arguments] ${output} ${number} ${status} ${containers} - - Should Contain ${output} Iteration number ${number} - Should Contain ${output} Iteration result ${status} - Should Contain ${output} Scheduled to move containers ${containers} + Should Contain ${output} Iteration number ${number} collapse_spaces=True + Should Contain ${output} Scheduled to move containers collapse_spaces=True + Should Contain ${output} Balancing duration: + Should Contain ${output} Iteration duration + Should Contain ${output} Current iteration info: + +Verify Balancer Iteration History + [arguments] ${output} + Should Contain ${output} Iteration history list: + Should Contain X Times ${output} Size scheduled to move 1 collapse_spaces=True + Should Contain X Times ${output} Moved data size 1 collapse_spaces=True + Should Contain X Times ${output} Scheduled to move containers 1 collapse_spaces=True + Should Contain X Times ${output} Already moved containers 1 collapse_spaces=True + Should Contain X Times ${output} Failed to move containers 0 1 collapse_spaces=True + Should Contain X Times ${output} Failed to move containers by timeout 0 1 collapse_spaces=True + Should Contain ${output} Iteration result ITERATION_COMPLETED collapse_spaces=True Run Balancer Status ${result} = Execute ozone admin containerbalancer status @@ -91,15 +99,14 @@ Run Balancer Status Run Balancer Verbose Status ${result} = Execute ozone admin containerbalancer status -v - Verify Verbose Balancer Status ${result} - Verify Balancer Iteration ${result} 1 IN_PROGRESS 3 - Should Contain ${result} Current iteration info: + Verify Balancer Iteration ${result} 1 + Should Contain ${result} Iteration result - collapse_spaces=True + Run Balancer Verbose History Status ${result} = Execute ozone admin containerbalancer status -v --history - Verify Verbose Balancer Status ${result} - Verify Balancer Iteration ${result} 1 IN_PROGRESS 3 - Should Contain ${result} Iteration history list: + Verify Balancer Iteration ${result} 1 + Verify Balancer Iteration History ${result} ContainerBalancer is Not Running ${result} = Execute ozone admin containerbalancer status @@ -170,6 +177,8 @@ Verify Container Balancer for RATIS/EC containers Run Balancer Verbose Status + Sleep 40000ms + Run Balancer Verbose History Status Wait Finish Of Balancing @@ -179,9 +188,4 @@ Verify Container Balancer for RATIS/EC containers #We need to ensure that after balancing, the amount of data recorded on each datanode falls within the following ranges: #{SIZE}*3 < used < {SIZE}*3.5 for RATIS containers, and {SIZE}*0.7 < used < {SIZE}*1.5 for EC containers. Should Be True ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} < ${SIZE} * ${UPPER_LIMIT} - Should Be True ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} > ${SIZE} * ${LOWER_LIMIT} - - - - - + Should Be True ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} > ${SIZE} * ${LOWER_LIMIT} \ No newline at end of file