From 23eba7d9fcc4c979bf02f0872412af92343a1e99 Mon Sep 17 00:00:00 2001 From: Xiaxuan Gao <32374858+MarkGaox@users.noreply.github.com> Date: Tue, 24 Oct 2023 14:42:04 -0700 Subject: [PATCH 01/11] Refactor stoppable check logic for enhanced zone analysis (#2654) Refactor the zone-based stoppable check logic and add support to randomly select zone order for the zone-based stoppable check. --- .../StoppableInstancesSelector.java | 232 ++++++++++++++++++ .../resources/helix/InstancesAccessor.java | 110 +++------ 2 files changed, 261 insertions(+), 81 deletions(-) create mode 100644 helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java diff --git a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java new file mode 100644 index 0000000000..dafe1ab2d8 --- /dev/null +++ b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java @@ -0,0 +1,232 @@ +package org.apache.helix.rest.clusterMaintenanceService; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; +import org.apache.commons.lang3.NotImplementedException; +import org.apache.helix.rest.server.json.cluster.ClusterTopology; +import org.apache.helix.rest.server.json.instance.StoppableCheck; + +public class StoppableInstancesSelector { + // This type does not belong to real HealthCheck failed reason. Also, if we add this type + // to HealthCheck enum, it could introduce more unnecessary check step since the InstanceServiceImpl + // loops all the types to do corresponding checks. + private final static String INSTANCE_NOT_EXIST = "HELIX:INSTANCE_NOT_EXIST"; + private String _clusterId; + private List _orderOfZone; + private String _customizedInput; + private ArrayNode _stoppableInstances; + private ObjectNode _failedStoppableInstances; + private MaintenanceManagementService _maintenanceService; + private ClusterTopology _clusterTopology; + + public StoppableInstancesSelector(String clusterId, List orderOfZone, + String customizedInput, ArrayNode stoppableInstances, ObjectNode failedStoppableInstances, + MaintenanceManagementService maintenanceService, ClusterTopology clusterTopology) { + _clusterId = clusterId; + _orderOfZone = orderOfZone; + _customizedInput = customizedInput; + _stoppableInstances = stoppableInstances; + _failedStoppableInstances = failedStoppableInstances; + _maintenanceService = maintenanceService; + _clusterTopology = clusterTopology; + } + + /** + * Evaluates and collects stoppable instances within a specified or determined zone based on the order of zones. + * If _orderOfZone is specified, the method targets the first non-empty zone; otherwise, it targets the zone with + * the highest instance count. The method iterates through instances, performing stoppable checks, and records + * reasons for non-stoppability. + * + * @param instances A list of instance to be evaluated. + * @throws IOException + */ + public void getStoppableInstancesInSingleZone(List instances) throws IOException { + List zoneBasedInstance = + getZoneBasedInstances(instances, _clusterTopology.toZoneMapping()); + Map instancesStoppableChecks = + _maintenanceService.batchGetInstancesStoppableChecks(_clusterId, zoneBasedInstance, + _customizedInput); + for (Map.Entry instanceStoppableCheck : instancesStoppableChecks.entrySet()) { + String instance = instanceStoppableCheck.getKey(); + StoppableCheck stoppableCheck = instanceStoppableCheck.getValue(); + if (!stoppableCheck.isStoppable()) { + ArrayNode failedReasonsNode = _failedStoppableInstances.putArray(instance); + for (String failedReason : stoppableCheck.getFailedChecks()) { + failedReasonsNode.add(JsonNodeFactory.instance.textNode(failedReason)); + } + } else { + _stoppableInstances.add(instance); + } + } + // Adding following logic to check whether instances exist or not. An instance exist could be + // checking following scenario: + // 1. Instance got dropped. (InstanceConfig is gone.) + // 2. Instance name has typo. + + // If we dont add this check, the instance, which does not exist, will be disappeared from + // result since Helix skips instances for instances not in the selected zone. User may get + // confused with the output. + Set nonSelectedInstances = new HashSet<>(instances); + nonSelectedInstances.removeAll(_clusterTopology.getAllInstances()); + for (String nonSelectedInstance : nonSelectedInstances) { + ArrayNode failedReasonsNode = _failedStoppableInstances.putArray(nonSelectedInstance); + failedReasonsNode.add(JsonNodeFactory.instance.textNode(INSTANCE_NOT_EXIST)); + } + } + + public void getStoppableInstancesCrossZones() { + // TODO: Add implementation to enable cross zone stoppable check. + throw new NotImplementedException("Not Implemented"); + } + + /** + * Determines the order of zones. If an order is provided by the user, it will be used directly. + * Otherwise, zones will be ordered by their associated instance count in descending order. + * + * If `random` is true, the order of zones will be randomized regardless of any previous order. + * + * @param random Indicates whether to randomize the order of zones. + */ + public void calculateOrderOfZone(boolean random) { + if (_orderOfZone == null) { + _orderOfZone = + new ArrayList<>(getOrderedZoneToInstancesMap(_clusterTopology.toZoneMapping()).keySet()); + } + + if (_orderOfZone.isEmpty()) { + return; + } + + if (random) { + Collections.shuffle(_orderOfZone); + } + } + + /** + * Get instances belongs to the first zone. If the zone is already empty, Helix will iterate zones + * by order until find the zone contains instances. + * + * The order of zones can directly come from user input. If user did not specify it, Helix will order + * zones by the number of associated instances in descending order. + * + * @param instances + * @param zoneMapping + * @return + */ + private List getZoneBasedInstances(List instances, + Map> zoneMapping) { + if (_orderOfZone.isEmpty()) { + return _orderOfZone; + } + + Set instanceSet = null; + for (String zone : _orderOfZone) { + instanceSet = new TreeSet<>(instances); + Set currentZoneInstanceSet = new HashSet<>(zoneMapping.get(zone)); + instanceSet.retainAll(currentZoneInstanceSet); + if (instanceSet.size() > 0) { + return new ArrayList<>(instanceSet); + } + } + + return Collections.EMPTY_LIST; + } + + /** + * Returns a map from zone to instances set, ordered by the number of instances in each zone + * in descending order. + * + * @param zoneMapping A map from zone to instances set + * @return An ordered map from zone to instances set, with zones having more instances appearing first. + */ + private Map> getOrderedZoneToInstancesMap( + Map> zoneMapping) { + return zoneMapping.entrySet().stream() + .sorted((e1, e2) -> e2.getValue().size() - e1.getValue().size()).collect( + Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, + (existing, replacement) -> existing, LinkedHashMap::new)); + } + + public static class StoppableInstancesSelectorBuilder { + private String _clusterId; + private List _orderOfZone; + private String _customizedInput; + private ArrayNode _stoppableInstances; + private ObjectNode _failedStoppableInstances; + private MaintenanceManagementService _maintenanceService; + private ClusterTopology _clusterTopology; + + public StoppableInstancesSelectorBuilder setClusterId(String clusterId) { + _clusterId = clusterId; + return this; + } + + public StoppableInstancesSelectorBuilder setOrderOfZone(List orderOfZone) { + _orderOfZone = orderOfZone; + return this; + } + + public StoppableInstancesSelectorBuilder setCustomizedInput(String customizedInput) { + _customizedInput = customizedInput; + return this; + } + + public StoppableInstancesSelectorBuilder setStoppableInstances(ArrayNode stoppableInstances) { + _stoppableInstances = stoppableInstances; + return this; + } + + public StoppableInstancesSelectorBuilder setFailedStoppableInstances(ObjectNode failedStoppableInstances) { + _failedStoppableInstances = failedStoppableInstances; + return this; + } + + public StoppableInstancesSelectorBuilder setMaintenanceService( + MaintenanceManagementService maintenanceService) { + _maintenanceService = maintenanceService; + return this; + } + + public StoppableInstancesSelectorBuilder setClusterTopology(ClusterTopology clusterTopology) { + _clusterTopology = clusterTopology; + return this; + } + + public StoppableInstancesSelector build() { + return new StoppableInstancesSelector(_clusterId, _orderOfZone, + _customizedInput, _stoppableInstances, _failedStoppableInstances, _maintenanceService, + _clusterTopology); + } + } +} diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java index 87be72b969..8a21202704 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java @@ -20,13 +20,10 @@ */ import java.io.IOException; -import java.util.ArrayList; import java.util.Collections; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.TreeSet; import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.POST; @@ -49,6 +46,7 @@ import org.apache.helix.model.InstanceConfig; import org.apache.helix.rest.clusterMaintenanceService.MaintenanceManagementService; import org.apache.helix.rest.common.HttpConstants; +import org.apache.helix.rest.clusterMaintenanceService.StoppableInstancesSelector; import org.apache.helix.rest.server.filters.ClusterAuth; import org.apache.helix.rest.server.json.cluster.ClusterTopology; import org.apache.helix.rest.server.json.instance.StoppableCheck; @@ -63,10 +61,6 @@ @Path("/clusters/{clusterId}/instances") public class InstancesAccessor extends AbstractHelixResource { private final static Logger _logger = LoggerFactory.getLogger(InstancesAccessor.class); - // This type does not belongs to real HealthCheck failed reason. Also if we add this type - // to HealthCheck enum, it could introduce more unnecessary check step since the InstanceServiceImpl - // loops all the types to do corresponding checks. - private final static String INSTANCE_NOT_EXIST = "HELIX:INSTANCE_NOT_EXIST"; public enum InstancesProperties { instances, online, @@ -80,7 +74,8 @@ public enum InstancesProperties { public enum InstanceHealthSelectionBase { instance_based, - zone_based + zone_based, + cross_zone_based } @ResponseMetered(name = HttpConstants.READ_REQUEST) @@ -153,7 +148,8 @@ public Response instancesOperations(@PathParam("clusterId") String clusterId, @QueryParam("command") String command, @QueryParam("continueOnFailures") boolean continueOnFailures, @QueryParam("skipZKRead") boolean skipZKRead, - @QueryParam("skipHealthCheckCategories") String skipHealthCheckCategories, String content) { + @QueryParam("skipHealthCheckCategories") String skipHealthCheckCategories, + @DefaultValue("false") @QueryParam("random") boolean random, String content) { Command cmd; try { cmd = Command.valueOf(command); @@ -198,7 +194,7 @@ public Response instancesOperations(@PathParam("clusterId") String clusterId, break; case stoppable: return batchGetStoppableInstances(clusterId, node, skipZKRead, continueOnFailures, - skipHealthCheckCategorySet); + skipHealthCheckCategorySet, random); default: _logger.error("Unsupported command :" + command); return badRequest("Unsupported command :" + command); @@ -215,8 +211,8 @@ public Response instancesOperations(@PathParam("clusterId") String clusterId, } private Response batchGetStoppableInstances(String clusterId, JsonNode node, boolean skipZKRead, - boolean continueOnFailures, Set skipHealthCheckCategories) - throws IOException { + boolean continueOnFailures, Set skipHealthCheckCategories, + boolean random) throws IOException { try { // TODO: Process input data from the content InstancesAccessor.InstanceHealthSelectionBase selectionBase = @@ -237,6 +233,12 @@ private Response batchGetStoppableInstances(String clusterId, JsonNode node, boo orderOfZone = OBJECT_MAPPER.readValue( node.get(InstancesAccessor.InstancesProperties.zone_order.name()).toString(), OBJECT_MAPPER.getTypeFactory().constructCollectionType(List.class, String.class)); + if (!orderOfZone.isEmpty() && random) { + String message = + "Both 'orderOfZone' and 'random' parameters are set. Please specify only one option."; + _logger.error(message); + return badRequest(message); + } } // Prepare output result @@ -253,40 +255,23 @@ private Response batchGetStoppableInstances(String clusterId, JsonNode node, boo ClusterService clusterService = new ClusterServiceImpl(getDataAccssor(clusterId), getConfigAccessor()); ClusterTopology clusterTopology = clusterService.getClusterTopology(clusterId); + StoppableInstancesSelector stoppableInstancesSelector = + new StoppableInstancesSelector.StoppableInstancesSelectorBuilder() + .setClusterId(clusterId) + .setOrderOfZone(orderOfZone) + .setCustomizedInput(customizedInput) + .setStoppableInstances(stoppableInstances) + .setFailedStoppableInstances(failedStoppableInstances) + .setMaintenanceService(maintenanceService) + .setClusterTopology(clusterTopology) + .build(); + stoppableInstancesSelector.calculateOrderOfZone(random); switch (selectionBase) { case zone_based: - List zoneBasedInstance = - getZoneBasedInstances(instances, orderOfZone, clusterTopology.toZoneMapping()); - Map instancesStoppableChecks = - maintenanceService.batchGetInstancesStoppableChecks(clusterId, zoneBasedInstance, - customizedInput); - for (Map.Entry instanceStoppableCheck : instancesStoppableChecks.entrySet()) { - String instance = instanceStoppableCheck.getKey(); - StoppableCheck stoppableCheck = instanceStoppableCheck.getValue(); - if (!stoppableCheck.isStoppable()) { - ArrayNode failedReasonsNode = failedStoppableInstances.putArray(instance); - for (String failedReason : stoppableCheck.getFailedChecks()) { - failedReasonsNode.add(JsonNodeFactory.instance.textNode(failedReason)); - } - } else { - stoppableInstances.add(instance); - } - } - // Adding following logic to check whether instances exist or not. An instance exist could be - // checking following scenario: - // 1. Instance got dropped. (InstanceConfig is gone.) - // 2. Instance name has typo. - - // If we dont add this check, the instance, which does not exist, will be disappeared from - // result since Helix skips instances for instances not in the selected zone. User may get - // confused with the output. - Set nonSelectedInstances = new HashSet<>(instances); - nonSelectedInstances.removeAll(clusterTopology.getAllInstances()); - for (String nonSelectedInstance : nonSelectedInstances) { - ArrayNode failedReasonsNode = failedStoppableInstances.putArray(nonSelectedInstance); - failedReasonsNode.add(JsonNodeFactory.instance.textNode(INSTANCE_NOT_EXIST)); - } - + stoppableInstancesSelector.getStoppableInstancesInSingleZone(instances); + break; + case cross_zone_based: + stoppableInstancesSelector.getStoppableInstancesCrossZones(); break; case instance_based: default: @@ -304,41 +289,4 @@ private Response batchGetStoppableInstances(String clusterId, JsonNode node, boo throw e; } } - - /** - * Get instances belongs to the first zone. If the zone is already empty, Helix will iterate zones - * by order until find the zone contains instances. - * - * The order of zones can directly come from user input. If user did not specify it, Helix will order - * zones with alphabetical order. - * - * @param instances - * @param orderedZones - * @return - */ - private List getZoneBasedInstances(List instances, List orderedZones, - Map> zoneMapping) { - - // If the orderedZones is not specified, we will order all zones in alphabetical order. - if (orderedZones == null) { - orderedZones = new ArrayList<>(zoneMapping.keySet()); - Collections.sort(orderedZones); - } - - if (orderedZones.isEmpty()) { - return orderedZones; - } - - Set instanceSet = null; - for (String zone : orderedZones) { - instanceSet = new TreeSet<>(instances); - Set currentZoneInstanceSet = new HashSet<>(zoneMapping.get(zone)); - instanceSet.retainAll(currentZoneInstanceSet); - if (instanceSet.size() > 0) { - return new ArrayList<>(instanceSet); - } - } - - return Collections.EMPTY_LIST; - } } From 729a0c5e96c04cdd5e6fa6566fbad0248885c036 Mon Sep 17 00:00:00 2001 From: Xiaxuan Gao <32374858+MarkGaox@users.noreply.github.com> Date: Tue, 31 Oct 2023 15:41:05 -0700 Subject: [PATCH 02/11] Implement the cross-zone-based stoppable check (#2680) Implement the cross-zone-based stoppable check and add to_be_stopped_instances query parameter to the stoppable check API --- .../helix/util/InstanceValidationUtil.java | 58 ++++++- .../util/TestInstanceValidationUtil.java | 77 +++++++++ .../MaintenanceManagementService.java | 40 +++-- .../StoppableInstancesSelector.java | 138 +++++++++++----- .../resources/helix/InstancesAccessor.java | 35 ++-- .../TestMaintenanceManagementService.java | 14 +- .../helix/rest/server/AbstractTestClass.java | 77 ++++++++- .../rest/server/TestInstancesAccessor.java | 155 ++++++++++++++++++ .../TestInstanceValidationUtilInRest.java | 64 ++++++++ 9 files changed, 576 insertions(+), 82 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java b/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java index fdbf7dd1a0..5f179e784e 100644 --- a/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java +++ b/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java @@ -20,6 +20,7 @@ */ import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -254,6 +255,28 @@ public static boolean hasErrorPartitions(HelixDataAccessor dataAccessor, String public static Map> perPartitionHealthCheck(List externalViews, Map> globalPartitionHealthStatus, String instanceToBeStop, HelixDataAccessor dataAccessor) { + return perPartitionHealthCheck(externalViews, globalPartitionHealthStatus, instanceToBeStop, + dataAccessor, Collections.emptySet()); + } + + /** + * Get the problematic partitions on the to-be-stop instance + * Requirement: + * If the instance and the toBeStoppedInstances are stopped and the partitions on them are OFFLINE, + * the cluster still have enough "healthy" replicas on other sibling instances + * + * - sibling instances mean those who share the same partition (replicas) of the to-be-stop instance + * + * @param globalPartitionHealthStatus (instance => (partition name, health status)) + * @param instanceToBeStop The instance to be stopped + * @param dataAccessor The data accessor + * @param toBeStoppedInstances A set of instances presumed to be are already stopped. And it + * shouldn't contain the `instanceToBeStop` + * @return A list of problematic partitions if the instance is stopped + */ + public static Map> perPartitionHealthCheck(List externalViews, + Map> globalPartitionHealthStatus, String instanceToBeStop, + HelixDataAccessor dataAccessor, Set toBeStoppedInstances) { Map> unhealthyPartitions = new HashMap<>(); for (ExternalView externalView : externalViews) { @@ -273,7 +296,8 @@ public static Map> perPartitionHealthCheck(List toBeStoppedInstances) { PropertyKey.Builder propertyKeyBuilder = dataAccessor.keyBuilder(); List resources = dataAccessor.getChildNames(propertyKeyBuilder.idealStates()); @@ -406,8 +451,9 @@ public static boolean siblingNodesActiveReplicaCheck(HelixDataAccessor dataAcces if (stateByInstanceMap.containsKey(instanceName)) { int numHealthySiblings = 0; for (Map.Entry entry : stateByInstanceMap.entrySet()) { - if (!entry.getKey().equals(instanceName) - && !unhealthyStates.contains(entry.getValue())) { + if (!entry.getKey().equals(instanceName) && (toBeStoppedInstances == null + || !toBeStoppedInstances.contains(entry.getKey())) && !unhealthyStates.contains( + entry.getValue())) { numHealthySiblings++; } } diff --git a/helix-core/src/test/java/org/apache/helix/util/TestInstanceValidationUtil.java b/helix-core/src/test/java/org/apache/helix/util/TestInstanceValidationUtil.java index 2c51fc92b2..aa1ba32290 100644 --- a/helix-core/src/test/java/org/apache/helix/util/TestInstanceValidationUtil.java +++ b/helix-core/src/test/java/org/apache/helix/util/TestInstanceValidationUtil.java @@ -21,7 +21,9 @@ import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Set; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -401,6 +403,81 @@ public void TestSiblingNodesActiveReplicaCheck_success() { Assert.assertTrue(result); } + @Test + public void TestSiblingNodesActiveReplicaCheckSuccessWithToBeStoppedInstances() { + String resource = "resource"; + Mock mock = new Mock(); + doReturn(ImmutableList.of(resource)).when(mock.dataAccessor) + .getChildNames(argThat(new PropertyKeyArgument(PropertyType.IDEALSTATES))); + // set ideal state + IdealState idealState = mock(IdealState.class); + when(idealState.isEnabled()).thenReturn(true); + when(idealState.isValid()).thenReturn(true); + when(idealState.getStateModelDefRef()).thenReturn("MasterSlave"); + doReturn(idealState).when(mock.dataAccessor).getProperty(argThat(new PropertyKeyArgument(PropertyType.IDEALSTATES))); + + // set external view + ExternalView externalView = mock(ExternalView.class); + when(externalView.getMinActiveReplicas()).thenReturn(2); + when(externalView.getStateModelDefRef()).thenReturn("MasterSlave"); + when(externalView.getPartitionSet()).thenReturn(ImmutableSet.of("db0")); + when(externalView.getStateMap("db0")).thenReturn(ImmutableMap.of(TEST_INSTANCE, "Master", + "instance1", "Slave", "instance2", "Slave", "instance3", "Slave")); + doReturn(externalView).when(mock.dataAccessor) + .getProperty(argThat(new PropertyKeyArgument(PropertyType.EXTERNALVIEW))); + StateModelDefinition stateModelDefinition = mock(StateModelDefinition.class); + when(stateModelDefinition.getInitialState()).thenReturn("OFFLINE"); + doReturn(stateModelDefinition).when(mock.dataAccessor) + .getProperty(argThat(new PropertyKeyArgument(PropertyType.STATEMODELDEFS))); + + Set toBeStoppedInstances = new HashSet<>(); + toBeStoppedInstances.add("instance3"); + toBeStoppedInstances.add("invalidInstances"); // include an invalid instance. + boolean result = + InstanceValidationUtil.siblingNodesActiveReplicaCheck(mock.dataAccessor, TEST_INSTANCE, toBeStoppedInstances); + Assert.assertTrue(result); + + result = + InstanceValidationUtil.siblingNodesActiveReplicaCheck(mock.dataAccessor, TEST_INSTANCE, null); + Assert.assertTrue(result); + } + + @Test + public void TestSiblingNodesActiveReplicaCheckFailsWithToBeStoppedInstances() { + String resource = "resource"; + Mock mock = new Mock(); + doReturn(ImmutableList.of(resource)).when(mock.dataAccessor) + .getChildNames(argThat(new PropertyKeyArgument(PropertyType.IDEALSTATES))); + // set ideal state + IdealState idealState = mock(IdealState.class); + when(idealState.isEnabled()).thenReturn(true); + when(idealState.isValid()).thenReturn(true); + when(idealState.getStateModelDefRef()).thenReturn("MasterSlave"); + doReturn(idealState).when(mock.dataAccessor).getProperty(argThat(new PropertyKeyArgument(PropertyType.IDEALSTATES))); + + // set external view + ExternalView externalView = mock(ExternalView.class); + when(externalView.getMinActiveReplicas()).thenReturn(2); + when(externalView.getStateModelDefRef()).thenReturn("MasterSlave"); + when(externalView.getPartitionSet()).thenReturn(ImmutableSet.of("db0")); + when(externalView.getStateMap("db0")).thenReturn(ImmutableMap.of(TEST_INSTANCE, "Master", + "instance1", "Slave", "instance2", "Slave", "instance3", "Slave")); + doReturn(externalView).when(mock.dataAccessor) + .getProperty(argThat(new PropertyKeyArgument(PropertyType.EXTERNALVIEW))); + StateModelDefinition stateModelDefinition = mock(StateModelDefinition.class); + when(stateModelDefinition.getInitialState()).thenReturn("OFFLINE"); + doReturn(stateModelDefinition).when(mock.dataAccessor) + .getProperty(argThat(new PropertyKeyArgument(PropertyType.STATEMODELDEFS))); + + Set toBeStoppedInstances = new HashSet<>(); + toBeStoppedInstances.add("instance1"); + toBeStoppedInstances.add("instance2"); + boolean result = + InstanceValidationUtil.siblingNodesActiveReplicaCheck(mock.dataAccessor, TEST_INSTANCE, toBeStoppedInstances); + + Assert.assertFalse(result); + } + @Test public void TestSiblingNodesActiveReplicaCheck_fail() { String resource = "resource"; diff --git a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementService.java b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementService.java index 529fc469d4..c3fa04966f 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementService.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementService.java @@ -339,17 +339,23 @@ private List getAllOperationClasses(List operations) */ public StoppableCheck getInstanceStoppableCheck(String clusterId, String instanceName, String jsonContent) throws IOException { - return batchGetInstancesStoppableChecks(clusterId, ImmutableList.of(instanceName), jsonContent) - .get(instanceName); + return batchGetInstancesStoppableChecks(clusterId, ImmutableList.of(instanceName), + jsonContent).get(instanceName); } - public Map batchGetInstancesStoppableChecks(String clusterId, List instances, String jsonContent) throws IOException { + return batchGetInstancesStoppableChecks(clusterId, instances, jsonContent, + Collections.emptySet()); + } + + public Map batchGetInstancesStoppableChecks(String clusterId, + List instances, String jsonContent, Set toBeStoppedInstances) throws IOException { Map finalStoppableChecks = new HashMap<>(); // helix instance check. List instancesForCustomInstanceLevelChecks = - batchHelixInstanceStoppableCheck(clusterId, instances, finalStoppableChecks); + batchHelixInstanceStoppableCheck(clusterId, instances, finalStoppableChecks, + toBeStoppedInstances); // custom check, includes partition check. batchCustomInstanceStoppableCheck(clusterId, instancesForCustomInstanceLevelChecks, finalStoppableChecks, getMapFromJsonPayload(jsonContent)); @@ -441,10 +447,11 @@ private MaintenanceManagementInstanceInfo takeFreeSingleInstanceHelper(String cl } private List batchHelixInstanceStoppableCheck(String clusterId, - Collection instances, Map finalStoppableChecks) { - Map> helixInstanceChecks = instances.stream().collect(Collectors - .toMap(Function.identity(), - instance -> POOL.submit(() -> performHelixOwnInstanceCheck(clusterId, instance)))); + Collection instances, Map finalStoppableChecks, + Set toBeStoppedInstances) { + Map> helixInstanceChecks = instances.stream().collect( + Collectors.toMap(Function.identity(), instance -> POOL.submit( + () -> performHelixOwnInstanceCheck(clusterId, instance, toBeStoppedInstances)))); // finalStoppableChecks contains instances that does not pass this health check return filterInstancesForNextCheck(helixInstanceChecks, finalStoppableChecks); } @@ -512,7 +519,8 @@ private Map batchInstanceHealthCheck( if (healthCheck.equals(HELIX_INSTANCE_STOPPABLE_CHECK)) { // this is helix own check instancesForNext = - batchHelixInstanceStoppableCheck(clusterId, instancesForNext, finalStoppableChecks); + batchHelixInstanceStoppableCheck(clusterId, instancesForNext, finalStoppableChecks, + Collections.emptySet()); } else if (healthCheck.equals(HELIX_CUSTOM_STOPPABLE_CHECK)) { // custom check, includes custom Instance check and partition check. instancesForNext = @@ -601,10 +609,12 @@ private boolean isNonBlockingCheck(StoppableCheck stoppableCheck) { return true; } - private StoppableCheck performHelixOwnInstanceCheck(String clusterId, String instanceName) { + private StoppableCheck performHelixOwnInstanceCheck(String clusterId, String instanceName, + Set toBeStoppedInstances) { LOG.info("Perform helix own custom health checks for {}/{}", clusterId, instanceName); Map helixStoppableCheck = - getInstanceHealthStatus(clusterId, instanceName, HealthCheck.STOPPABLE_CHECK_LIST); + getInstanceHealthStatus(clusterId, instanceName, HealthCheck.STOPPABLE_CHECK_LIST, + toBeStoppedInstances); return new StoppableCheck(helixStoppableCheck, StoppableCheck.Category.HELIX_OWN_CHECK); } @@ -698,6 +708,12 @@ public static boolean getBooleanFromJsonPayload(String jsonString) @VisibleForTesting protected Map getInstanceHealthStatus(String clusterId, String instanceName, List healthChecks) { + return getInstanceHealthStatus(clusterId, instanceName, healthChecks, Collections.emptySet()); + } + + @VisibleForTesting + protected Map getInstanceHealthStatus(String clusterId, String instanceName, + List healthChecks, Set toBeStoppedInstances) { Map healthStatus = new HashMap<>(); for (HealthCheck healthCheck : healthChecks) { switch (healthCheck) { @@ -745,7 +761,7 @@ protected Map getInstanceHealthStatus(String clusterId, String break; case MIN_ACTIVE_REPLICA_CHECK_FAILED: healthStatus.put(HealthCheck.MIN_ACTIVE_REPLICA_CHECK_FAILED.name(), - InstanceValidationUtil.siblingNodesActiveReplicaCheck(_dataAccessor, instanceName)); + InstanceValidationUtil.siblingNodesActiveReplicaCheck(_dataAccessor, instanceName, toBeStoppedInstances)); break; default: LOG.error("Unsupported health check: {}", healthCheck); diff --git a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java index dafe1ab2d8..8cf8bc83cb 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; @@ -33,31 +34,27 @@ import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; -import org.apache.commons.lang3.NotImplementedException; import org.apache.helix.rest.server.json.cluster.ClusterTopology; import org.apache.helix.rest.server.json.instance.StoppableCheck; +import org.apache.helix.rest.server.resources.helix.InstancesAccessor; public class StoppableInstancesSelector { // This type does not belong to real HealthCheck failed reason. Also, if we add this type // to HealthCheck enum, it could introduce more unnecessary check step since the InstanceServiceImpl // loops all the types to do corresponding checks. private final static String INSTANCE_NOT_EXIST = "HELIX:INSTANCE_NOT_EXIST"; - private String _clusterId; + private final String _clusterId; private List _orderOfZone; - private String _customizedInput; - private ArrayNode _stoppableInstances; - private ObjectNode _failedStoppableInstances; - private MaintenanceManagementService _maintenanceService; - private ClusterTopology _clusterTopology; + private final String _customizedInput; + private final MaintenanceManagementService _maintenanceService; + private final ClusterTopology _clusterTopology; public StoppableInstancesSelector(String clusterId, List orderOfZone, - String customizedInput, ArrayNode stoppableInstances, ObjectNode failedStoppableInstances, - MaintenanceManagementService maintenanceService, ClusterTopology clusterTopology) { + String customizedInput, MaintenanceManagementService maintenanceService, + ClusterTopology clusterTopology) { _clusterId = clusterId; _orderOfZone = orderOfZone; _customizedInput = customizedInput; - _stoppableInstances = stoppableInstances; - _failedStoppableInstances = failedStoppableInstances; _maintenanceService = maintenanceService; _clusterTopology = clusterTopology; } @@ -69,26 +66,92 @@ public StoppableInstancesSelector(String clusterId, List orderOfZone, * reasons for non-stoppability. * * @param instances A list of instance to be evaluated. + * @param toBeStoppedInstances A list of instances presumed to be are already stopped + * @return An ObjectNode containing: + * - 'stoppableNode': List of instances that can be stopped. + * - 'instance_not_stoppable_with_reasons': A map with the instance name as the key and + * a list of reasons for non-stoppability as the value. * @throws IOException */ - public void getStoppableInstancesInSingleZone(List instances) throws IOException { + public ObjectNode getStoppableInstancesInSingleZone(List instances, + List toBeStoppedInstances) throws IOException { + ObjectNode result = JsonNodeFactory.instance.objectNode(); + ArrayNode stoppableInstances = + result.putArray(InstancesAccessor.InstancesProperties.instance_stoppable_parallel.name()); + ObjectNode failedStoppableInstances = result.putObject( + InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); + Set toBeStoppedInstancesSet = new HashSet<>(toBeStoppedInstances); + List zoneBasedInstance = getZoneBasedInstances(instances, _clusterTopology.toZoneMapping()); + populateStoppableInstances(zoneBasedInstance, toBeStoppedInstancesSet, stoppableInstances, + failedStoppableInstances); + processNonexistentInstances(instances, failedStoppableInstances); + + return result; + } + + /** + * Evaluates and collects stoppable instances cross a set of zones based on the order of zones. + * The method iterates through instances, performing stoppable checks, and records reasons for + * non-stoppability. + * + * @param instances A list of instance to be evaluated. + * @param toBeStoppedInstances A list of instances presumed to be are already stopped + * @return An ObjectNode containing: + * - 'stoppableNode': List of instances that can be stopped. + * - 'instance_not_stoppable_with_reasons': A map with the instance name as the key and + * a list of reasons for non-stoppability as the value. + * @throws IOException + */ + public ObjectNode getStoppableInstancesCrossZones(List instances, + List toBeStoppedInstances) throws IOException { + ObjectNode result = JsonNodeFactory.instance.objectNode(); + ArrayNode stoppableInstances = + result.putArray(InstancesAccessor.InstancesProperties.instance_stoppable_parallel.name()); + ObjectNode failedStoppableInstances = result.putObject( + InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); + Set toBeStoppedInstancesSet = new HashSet<>(toBeStoppedInstances); + + Map> zoneMapping = _clusterTopology.toZoneMapping(); + for (String zone : _orderOfZone) { + Set instanceSet = new HashSet<>(instances); + Set currentZoneInstanceSet = new HashSet<>(zoneMapping.get(zone)); + instanceSet.retainAll(currentZoneInstanceSet); + if (instanceSet.isEmpty()) { + continue; + } + populateStoppableInstances(new ArrayList<>(instanceSet), toBeStoppedInstancesSet, stoppableInstances, + failedStoppableInstances); + } + processNonexistentInstances(instances, failedStoppableInstances); + return result; + } + + private void populateStoppableInstances(List instances, Set toBeStoppedInstances, + ArrayNode stoppableInstances, ObjectNode failedStoppableInstances) throws IOException { Map instancesStoppableChecks = - _maintenanceService.batchGetInstancesStoppableChecks(_clusterId, zoneBasedInstance, - _customizedInput); + _maintenanceService.batchGetInstancesStoppableChecks(_clusterId, instances, + _customizedInput, toBeStoppedInstances); + for (Map.Entry instanceStoppableCheck : instancesStoppableChecks.entrySet()) { String instance = instanceStoppableCheck.getKey(); StoppableCheck stoppableCheck = instanceStoppableCheck.getValue(); if (!stoppableCheck.isStoppable()) { - ArrayNode failedReasonsNode = _failedStoppableInstances.putArray(instance); + ArrayNode failedReasonsNode = failedStoppableInstances.putArray(instance); for (String failedReason : stoppableCheck.getFailedChecks()) { failedReasonsNode.add(JsonNodeFactory.instance.textNode(failedReason)); } } else { - _stoppableInstances.add(instance); + stoppableInstances.add(instance); + // Update the toBeStoppedInstances set with the currently identified stoppable instance. + // This ensures that subsequent checks in other zones are aware of this instance's stoppable status. + toBeStoppedInstances.add(instance); } } + } + + private void processNonexistentInstances(List instances, ObjectNode failedStoppableInstances) { // Adding following logic to check whether instances exist or not. An instance exist could be // checking following scenario: // 1. Instance got dropped. (InstanceConfig is gone.) @@ -100,28 +163,36 @@ public void getStoppableInstancesInSingleZone(List instances) throws IOE Set nonSelectedInstances = new HashSet<>(instances); nonSelectedInstances.removeAll(_clusterTopology.getAllInstances()); for (String nonSelectedInstance : nonSelectedInstances) { - ArrayNode failedReasonsNode = _failedStoppableInstances.putArray(nonSelectedInstance); + ArrayNode failedReasonsNode = failedStoppableInstances.putArray(nonSelectedInstance); failedReasonsNode.add(JsonNodeFactory.instance.textNode(INSTANCE_NOT_EXIST)); } } - public void getStoppableInstancesCrossZones() { - // TODO: Add implementation to enable cross zone stoppable check. - throw new NotImplementedException("Not Implemented"); - } - /** * Determines the order of zones. If an order is provided by the user, it will be used directly. * Otherwise, zones will be ordered by their associated instance count in descending order. * * If `random` is true, the order of zones will be randomized regardless of any previous order. * + * @param instances A list of instance to be used to calculate the order of zones. * @param random Indicates whether to randomize the order of zones. */ - public void calculateOrderOfZone(boolean random) { + public void calculateOrderOfZone(List instances, boolean random) { if (_orderOfZone == null) { - _orderOfZone = - new ArrayList<>(getOrderedZoneToInstancesMap(_clusterTopology.toZoneMapping()).keySet()); + Map> zoneMapping = _clusterTopology.toZoneMapping(); + Map> zoneToInstancesMap = new HashMap<>(); + for (ClusterTopology.Zone zone : _clusterTopology.getZones()) { + Set instanceSet = new HashSet<>(instances); + // TODO: Use instance config from Helix-rest Cache to get the zone instead of reading the topology info + Set currentZoneInstanceSet = new HashSet<>(zoneMapping.get(zone.getId())); + instanceSet.retainAll(currentZoneInstanceSet); + if (instanceSet.isEmpty()) { + continue; + } + zoneToInstancesMap.put(zone.getId(), instanceSet); + } + + _orderOfZone = new ArrayList<>(getOrderedZoneToInstancesMap(zoneToInstancesMap).keySet()); } if (_orderOfZone.isEmpty()) { @@ -182,8 +253,6 @@ public static class StoppableInstancesSelectorBuilder { private String _clusterId; private List _orderOfZone; private String _customizedInput; - private ArrayNode _stoppableInstances; - private ObjectNode _failedStoppableInstances; private MaintenanceManagementService _maintenanceService; private ClusterTopology _clusterTopology; @@ -202,16 +271,6 @@ public StoppableInstancesSelectorBuilder setCustomizedInput(String customizedInp return this; } - public StoppableInstancesSelectorBuilder setStoppableInstances(ArrayNode stoppableInstances) { - _stoppableInstances = stoppableInstances; - return this; - } - - public StoppableInstancesSelectorBuilder setFailedStoppableInstances(ObjectNode failedStoppableInstances) { - _failedStoppableInstances = failedStoppableInstances; - return this; - } - public StoppableInstancesSelectorBuilder setMaintenanceService( MaintenanceManagementService maintenanceService) { _maintenanceService = maintenanceService; @@ -224,9 +283,8 @@ public StoppableInstancesSelectorBuilder setClusterTopology(ClusterTopology clus } public StoppableInstancesSelector build() { - return new StoppableInstancesSelector(_clusterId, _orderOfZone, - _customizedInput, _stoppableInstances, _failedStoppableInstances, _maintenanceService, - _clusterTopology); + return new StoppableInstancesSelector(_clusterId, _orderOfZone, _customizedInput, + _maintenanceService, _clusterTopology); } } } diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java index 8a21202704..785195ebe1 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java @@ -20,7 +20,9 @@ */ import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -67,6 +69,7 @@ public enum InstancesProperties { disabled, selection_base, zone_order, + to_be_stopped_instances, customized_values, instance_stoppable_parallel, instance_not_stoppable_with_reasons @@ -224,6 +227,7 @@ private Response batchGetStoppableInstances(String clusterId, JsonNode node, boo List orderOfZone = null; String customizedInput = null; + List toBeStoppedInstances = Collections.emptyList(); if (node.get(InstancesAccessor.InstancesProperties.customized_values.name()) != null) { customizedInput = node.get(InstancesAccessor.InstancesProperties.customized_values.name()).toString(); @@ -235,18 +239,26 @@ private Response batchGetStoppableInstances(String clusterId, JsonNode node, boo OBJECT_MAPPER.getTypeFactory().constructCollectionType(List.class, String.class)); if (!orderOfZone.isEmpty() && random) { String message = - "Both 'orderOfZone' and 'random' parameters are set. Please specify only one option."; + "Both 'zone_order' and 'random' parameters are set. Please specify only one option."; _logger.error(message); return badRequest(message); } } - // Prepare output result - ObjectNode result = JsonNodeFactory.instance.objectNode(); - ArrayNode stoppableInstances = - result.putArray(InstancesAccessor.InstancesProperties.instance_stoppable_parallel.name()); - ObjectNode failedStoppableInstances = result.putObject( - InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); + if (node.get(InstancesAccessor.InstancesProperties.to_be_stopped_instances.name()) != null) { + toBeStoppedInstances = OBJECT_MAPPER.readValue( + node.get(InstancesProperties.to_be_stopped_instances.name()).toString(), + OBJECT_MAPPER.getTypeFactory().constructCollectionType(List.class, String.class)); + Set instanceSet = new HashSet<>(instances); + instanceSet.retainAll(toBeStoppedInstances); + if (!instanceSet.isEmpty()) { + String message = + "'to_be_stopped_instances' and 'instances' have intersection: " + instanceSet + + ". Please make them mutually exclusive."; + _logger.error(message); + return badRequest(message); + } + } MaintenanceManagementService maintenanceService = new MaintenanceManagementService((ZKHelixDataAccessor) getDataAccssor(clusterId), @@ -260,18 +272,17 @@ private Response batchGetStoppableInstances(String clusterId, JsonNode node, boo .setClusterId(clusterId) .setOrderOfZone(orderOfZone) .setCustomizedInput(customizedInput) - .setStoppableInstances(stoppableInstances) - .setFailedStoppableInstances(failedStoppableInstances) .setMaintenanceService(maintenanceService) .setClusterTopology(clusterTopology) .build(); - stoppableInstancesSelector.calculateOrderOfZone(random); + stoppableInstancesSelector.calculateOrderOfZone(instances, random); + ObjectNode result; switch (selectionBase) { case zone_based: - stoppableInstancesSelector.getStoppableInstancesInSingleZone(instances); + result = stoppableInstancesSelector.getStoppableInstancesInSingleZone(instances, toBeStoppedInstances); break; case cross_zone_based: - stoppableInstancesSelector.getStoppableInstancesCrossZones(); + result = stoppableInstancesSelector.getStoppableInstancesCrossZones(instances, toBeStoppedInstances); break; case instance_based: default: diff --git a/helix-rest/src/test/java/org/apache/helix/rest/clusterMaintenanceService/TestMaintenanceManagementService.java b/helix-rest/src/test/java/org/apache/helix/rest/clusterMaintenanceService/TestMaintenanceManagementService.java index f8408b0707..a49a95066f 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/clusterMaintenanceService/TestMaintenanceManagementService.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/clusterMaintenanceService/TestMaintenanceManagementService.java @@ -114,7 +114,7 @@ public MockMaintenanceManagementService(ZKHelixDataAccessor dataAccessor, @Override protected Map getInstanceHealthStatus(String clusterId, String instanceName, - List healthChecks) { + List healthChecks, Set toBeStoppedInstances) { return Collections.emptyMap(); } } @@ -127,7 +127,7 @@ public void testGetInstanceStoppableCheckWhenHelixOwnCheckFail() throws IOExcept _customRestClient, false, false, HelixRestNamespace.DEFAULT_NAMESPACE_NAME) { @Override protected Map getInstanceHealthStatus(String clusterId, - String instanceName, List healthChecks) { + String instanceName, List healthChecks, Set toBeStoppedInstances) { return failedCheck; } }; @@ -147,7 +147,7 @@ public void testGetInstanceStoppableCheckWhenCustomInstanceCheckFail() throws IO _customRestClient, false, false, HelixRestNamespace.DEFAULT_NAMESPACE_NAME) { @Override protected Map getInstanceHealthStatus(String clusterId, - String instanceName, List healthChecks) { + String instanceName, List healthChecks, Set toBeStoppedInstances) { return Collections.emptyMap(); } }; @@ -227,7 +227,7 @@ public void testGetInstanceStoppableCheckWhenCustomInstanceCheckDisabled() throw _customRestClient, false, false, new HashSet<>(Arrays.asList(StoppableCheck.Category.CUSTOM_INSTANCE_CHECK)), HelixRestNamespace.DEFAULT_NAMESPACE_NAME); - + StoppableCheck actual = service.getInstanceStoppableCheck(TEST_CLUSTER, TEST_INSTANCE, ""); List expectedFailedChecks = Arrays.asList( StoppableCheck.Category.CUSTOM_PARTITION_CHECK.getPrefix() @@ -246,7 +246,7 @@ public void testGetInstanceStoppableCheckConnectionRefused() throws IOException _customRestClient, false, false, HelixRestNamespace.DEFAULT_NAMESPACE_NAME) { @Override protected Map getInstanceHealthStatus(String clusterId, - String instanceName, List healthChecks) { + String instanceName, List healthChecks, Set toBeStoppedInstances) { return Collections.emptyMap(); } }; @@ -365,7 +365,7 @@ public void testGetStoppableWithAllChecks() throws IOException { HelixRestNamespace.DEFAULT_NAMESPACE_NAME) { @Override protected Map getInstanceHealthStatus(String clusterId, - String instanceName, List healthChecks) { + String instanceName, List healthChecks, Set toBeStoppedInstances) { return instanceHealthFailedCheck; } }; @@ -393,7 +393,7 @@ public void testGetInstanceStoppableCheckWhenPartitionsCheckFail() throws IOExce _customRestClient, false, false, HelixRestNamespace.DEFAULT_NAMESPACE_NAME) { @Override protected Map getInstanceHealthStatus(String clusterId, - String instanceName, List healthChecks) { + String instanceName, List healthChecks, Set toBeStoppedInstances) { return Collections.emptyMap(); } }; diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java b/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java index a7cf91c7b8..68561ce839 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java @@ -54,6 +54,7 @@ import org.apache.helix.model.ClusterConfig; import org.apache.helix.model.IdealState; import org.apache.helix.model.InstanceConfig; +import org.apache.helix.model.RESTConfig; import org.apache.helix.participant.StateMachineEngine; import org.apache.helix.rest.common.ContextPropertyKeys; import org.apache.helix.rest.common.HelixRestNamespace; @@ -129,9 +130,14 @@ public class AbstractTestClass extends JerseyTestNg.ContainerPerClassTest { protected static HelixZkClient _gZkClientTestNS; protected static BaseDataAccessor _baseAccessorTestNS; protected static final String STOPPABLE_CLUSTER = "StoppableTestCluster"; + protected static final String STOPPABLE_CLUSTER2 = "StoppableTestCluster2"; protected static final String TASK_TEST_CLUSTER = "TaskTestCluster"; protected static final List STOPPABLE_INSTANCES = Arrays.asList("instance0", "instance1", "instance2", "instance3", "instance4", "instance5"); + protected static final List STOPPABLE_INSTANCES2 = + Arrays.asList("instance0", "instance1", "instance2", "instance3", "instance4", "instance5", + "instance6", "instance7", "instance8", "instance9", "instance10", "instance11", + "instance12", "instance13", "instance14"); protected static Set _clusters; protected static String _superCluster = "superCluster"; @@ -329,13 +335,14 @@ protected void setupHelixResources() throws Exception { _configAccessor.setClusterConfig(cluster, clusterConfig); createResourceConfigs(cluster, 8); _workflowMap.put(cluster, createWorkflows(cluster, 3)); - Set resources = createResources(cluster, 8); + Set resources = createResources(cluster, 8, MIN_ACTIVE_REPLICA, NUM_REPLICA); _instancesMap.put(cluster, instances); _liveInstancesMap.put(cluster, liveInstances); _resourcesMap.put(cluster, resources); _clusterControllerManagers.add(startController(cluster)); } preSetupForParallelInstancesStoppableTest(STOPPABLE_CLUSTER, STOPPABLE_INSTANCES); + preSetupForCrosszoneParallelInstancesStoppableTest(STOPPABLE_CLUSTER2, STOPPABLE_INSTANCES2); } protected Set createInstances(String cluster, int numInstances) { @@ -348,16 +355,17 @@ protected Set createInstances(String cluster, int numInstances) { return instances; } - protected Set createResources(String cluster, int numResources) { + protected Set createResources(String cluster, int numResources, int minActiveReplica, + int replicationFactor) { Set resources = new HashSet<>(); for (int i = 0; i < numResources; i++) { String resource = cluster + "_db_" + i; _gSetupTool.addResourceToCluster(cluster, resource, NUM_PARTITIONS, "MasterSlave"); IdealState idealState = _gSetupTool.getClusterManagementTool().getResourceIdealState(cluster, resource); - idealState.setMinActiveReplicas(MIN_ACTIVE_REPLICA); + idealState.setMinActiveReplicas(minActiveReplica); _gSetupTool.getClusterManagementTool().setResourceIdealState(cluster, resource, idealState); - _gSetupTool.rebalanceStorageCluster(cluster, resource, NUM_REPLICA); + _gSetupTool.rebalanceStorageCluster(cluster, resource, replicationFactor); resources.add(resource); } return resources; @@ -575,7 +583,7 @@ private void preSetupForParallelInstancesStoppableTest(String clusterName, // Start participant startInstances(clusterName, new TreeSet<>(instances), 3); - createResources(clusterName, 1); + createResources(clusterName, 1, MIN_ACTIVE_REPLICA, NUM_REPLICA); _clusterControllerManagers.add(startController(clusterName)); // Make sure that cluster config exists @@ -606,6 +614,65 @@ private void preSetupForParallelInstancesStoppableTest(String clusterName, _workflowMap.put(STOPPABLE_CLUSTER, createWorkflows(STOPPABLE_CLUSTER, 3)); } + private void preSetupForCrosszoneParallelInstancesStoppableTest(String clusterName, + List instances) throws Exception { + _gSetupTool.addCluster(clusterName, true); + ClusterConfig clusterConfig = _configAccessor.getClusterConfig(clusterName); + clusterConfig.setFaultZoneType("helixZoneId"); + clusterConfig.setPersistIntermediateAssignment(true); + _configAccessor.setClusterConfig(clusterName, clusterConfig); + RESTConfig emptyRestConfig = new RESTConfig(clusterName); + _configAccessor.setRESTConfig(clusterName, emptyRestConfig); + // Create instance configs + List instanceConfigs = new ArrayList<>(); + int perZoneInstancesCount = 3; + int curZoneCount = 0, zoneId = 1; + for (int i = 0; i < instances.size(); i++) { + InstanceConfig instanceConfig = new InstanceConfig(instances.get(i)); + instanceConfig.setDomain("helixZoneId=zone" + zoneId + ",host=instance" + i); + if (++curZoneCount >= perZoneInstancesCount) { + curZoneCount = 0; + zoneId++; + } + instanceConfigs.add(instanceConfig); + } + + for (InstanceConfig instanceConfig : instanceConfigs) { + _gSetupTool.getClusterManagementTool().addInstance(clusterName, instanceConfig); + } + + // Start participant + startInstances(clusterName, new TreeSet<>(instances), instances.size()); + createResources(clusterName, 1, 2, 3); + _clusterControllerManagers.add(startController(clusterName)); + + // Make sure that cluster config exists + boolean isClusterConfigExist = TestHelper.verify(() -> { + ClusterConfig stoppableClusterConfig; + try { + stoppableClusterConfig = _configAccessor.getClusterConfig(clusterName); + } catch (Exception e) { + return false; + } + return (stoppableClusterConfig != null); + }, TestHelper.WAIT_DURATION); + Assert.assertTrue(isClusterConfigExist); + // Make sure that instance config exists for the instance0 to instance5 + for (String instance: instances) { + boolean isinstanceConfigExist = TestHelper.verify(() -> { + InstanceConfig instanceConfig; + try { + instanceConfig = _configAccessor.getInstanceConfig(clusterName, instance); + } catch (Exception e) { + return false; + } + return (instanceConfig != null); + }, TestHelper.WAIT_DURATION); + Assert.assertTrue(isinstanceConfigExist); + } + _clusters.add(clusterName); + _workflowMap.put(clusterName, createWorkflows(clusterName, 3)); + } /** * Starts a HelixRestServer for the test suite. * @return diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java b/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java index 01701a4864..2bc539a4d4 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java @@ -40,12 +40,167 @@ import org.apache.helix.rest.server.util.JerseyUriRequestBuilder; import org.apache.helix.tools.ClusterVerifiers.BestPossibleExternalViewVerifier; import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; public class TestInstancesAccessor extends AbstractTestClass { private final static String CLUSTER_NAME = "TestCluster_0"; + @DataProvider + public Object[][] generatePayloadCrossZoneStoppableCheckWithZoneOrder() { + return new Object[][]{ + {String.format( + "{\"%s\":\"%s\",\"%s\":[\"%s\",\"%s\",\"%s\",\"%s\", \"%s\", \"%s\", \"%s\"," + + " \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"]," + + "\"%s\":[\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"%s\":[\"%s\"]}", + InstancesAccessor.InstancesProperties.selection_base.name(), + InstancesAccessor.InstanceHealthSelectionBase.cross_zone_based.name(), + InstancesAccessor.InstancesProperties.instances.name(), "instance1", "instance2", + "instance3", "instance4", "instance5", "instance6", "instance7", "instance8", + "instance9", "instance10", "instance11", "instance12", "instance13", "instance14", + "invalidInstance", + InstancesAccessor.InstancesProperties.zone_order.name(),"zone5", "zone4", "zone3", "zone2", + "zone1", + InstancesAccessor.InstancesProperties.to_be_stopped_instances.name(), + "instance0"), + }, + {String.format( + "{\"%s\":\"%s\",\"%s\":[\"%s\",\"%s\",\"%s\",\"%s\", \"%s\", \"%s\", \"%s\",\"%s\", \"%s\", \"%s\"]," + + "\"%s\":[\"%s\", \"%s\", \"%s\", \"%s\", \"%s\"], \"%s\":[\"%s\", \"%s\", \"%s\"]}", + InstancesAccessor.InstancesProperties.selection_base.name(), + InstancesAccessor.InstanceHealthSelectionBase.cross_zone_based.name(), + InstancesAccessor.InstancesProperties.instances.name(), "instance1", "instance3", + "instance6", "instance9", "instance10", "instance11", "instance12", "instance13", + "instance14", "invalidInstance", + InstancesAccessor.InstancesProperties.zone_order.name(), "zone5", "zone4", "zone1", + "zone3", "zone2", InstancesAccessor.InstancesProperties.to_be_stopped_instances.name(), + "instance0", "invalidInstance1", "invalidInstance1"), + } + }; + } + + @Test + public void testInstanceStoppableZoneBasedWithToBeStoppedInstances() throws IOException { + System.out.println("Start test :" + TestHelper.getTestMethodName()); + + String content = String.format( + "{\"%s\":\"%s\",\"%s\":[\"%s\",\"%s\",\"%s\",\"%s\",\"%s\", \"%s\"], \"%s\":[\"%s\",\"%s\"], \"%s\":[\"%s\", \"%s\", \"%s\"]}", + InstancesAccessor.InstancesProperties.selection_base.name(), + InstancesAccessor.InstanceHealthSelectionBase.zone_based.name(), + InstancesAccessor.InstancesProperties.instances.name(), "instance1", + "instance2", "instance3", "instance4", "instance5", "invalidInstance", + InstancesAccessor.InstancesProperties.zone_order.name(), "zone2", "zone1", + InstancesAccessor.InstancesProperties.to_be_stopped_instances.name(), "instance0", "instance6", "invalidInstance1"); + + Response response = new JerseyUriRequestBuilder( + "clusters/{}/instances?command=stoppable&skipHealthCheckCategories=CUSTOM_INSTANCE_CHECK,CUSTOM_PARTITION_CHECK").format( + STOPPABLE_CLUSTER2).post(this, Entity.entity(content, MediaType.APPLICATION_JSON_TYPE)); + JsonNode jsonNode = OBJECT_MAPPER.readTree(response.readEntity(String.class)); + + Set stoppableSet = getStringSet(jsonNode, + InstancesAccessor.InstancesProperties.instance_stoppable_parallel.name()); + Assert.assertTrue(stoppableSet.contains("instance4") && stoppableSet.contains("instance3")); + + JsonNode nonStoppableInstances = jsonNode.get( + InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); + // "StoppableTestCluster2_db_0_3" : { "instance0" : "MASTER", "instance13" : "SLAVE", "instance5" : "SLAVE"}. + // Since instance0 is to_be_stopped and MIN_ACTIVE_REPLICA is 2, instance5 is not stoppable. + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance5"), + ImmutableSet.of("HELIX:MIN_ACTIVE_REPLICA_CHECK_FAILED")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "invalidInstance"), + ImmutableSet.of("HELIX:INSTANCE_NOT_EXIST")); + + System.out.println("End test :" + TestHelper.getTestMethodName()); + } + + @Test + public void testInstanceStoppableZoneBasedWithoutZoneOrder() throws IOException { + System.out.println("Start test :" + TestHelper.getTestMethodName()); + String content = String.format( + "{\"%s\":\"%s\",\"%s\":[\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"], \"%s\":[\"%s\", \"%s\", \"%s\"]}", + InstancesAccessor.InstancesProperties.selection_base.name(), + InstancesAccessor.InstanceHealthSelectionBase.zone_based.name(), + InstancesAccessor.InstancesProperties.instances.name(), "instance0", "instance1", + "instance2", "instance3", "instance4", "invalidInstance", + InstancesAccessor.InstancesProperties.to_be_stopped_instances.name(), + "instance7", "instance9", "instance10"); + + Response response = new JerseyUriRequestBuilder( + "clusters/{}/instances?command=stoppable&skipHealthCheckCategories=CUSTOM_INSTANCE_CHECK,CUSTOM_PARTITION_CHECK").format( + STOPPABLE_CLUSTER2).post(this, Entity.entity(content, MediaType.APPLICATION_JSON_TYPE)); + JsonNode jsonNode = OBJECT_MAPPER.readTree(response.readEntity(String.class)); + + // Without zone order, helix should pick the zone1 because it has higher instance count than zone2. + Set stoppableSet = getStringSet(jsonNode, + InstancesAccessor.InstancesProperties.instance_stoppable_parallel.name()); + Assert.assertTrue(stoppableSet.contains("instance0") && stoppableSet.contains("instance1")); + + JsonNode nonStoppableInstances = jsonNode.get( + InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance2"), + ImmutableSet.of("HELIX:MIN_ACTIVE_REPLICA_CHECK_FAILED")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "invalidInstance"), + ImmutableSet.of("HELIX:INSTANCE_NOT_EXIST")); + System.out.println("End test :" + TestHelper.getTestMethodName()); + } + + @Test(dataProvider = "generatePayloadCrossZoneStoppableCheckWithZoneOrder") + public void testCrossZoneStoppableWithZoneOrder(String content) throws IOException { + System.out.println("Start test :" + TestHelper.getTestMethodName()); + Response response = new JerseyUriRequestBuilder( + "clusters/{}/instances?command=stoppable&skipHealthCheckCategories=CUSTOM_INSTANCE_CHECK,CUSTOM_PARTITION_CHECK").format( + STOPPABLE_CLUSTER2).post(this, Entity.entity(content, MediaType.APPLICATION_JSON_TYPE)); + JsonNode jsonNode = OBJECT_MAPPER.readTree(response.readEntity(String.class)); + + Set stoppableSet = getStringSet(jsonNode, + InstancesAccessor.InstancesProperties.instance_stoppable_parallel.name()); + Assert.assertTrue(stoppableSet.contains("instance14") && stoppableSet.contains("instance12") + && stoppableSet.contains("instance11") && stoppableSet.contains("instance10")); + + JsonNode nonStoppableInstances = jsonNode.get( + InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance13"), + ImmutableSet.of("HELIX:MIN_ACTIVE_REPLICA_CHECK_FAILED")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "invalidInstance"), + ImmutableSet.of("HELIX:INSTANCE_NOT_EXIST")); + System.out.println("End test :" + TestHelper.getTestMethodName()); + } + @Test + public void testCrossZoneStoppableWithoutZoneOrder() throws IOException { + System.out.println("Start test :" + TestHelper.getTestMethodName()); + String content = String.format( + "{\"%s\":\"%s\",\"%s\":[\"%s\",\"%s\",\"%s\",\"%s\", \"%s\", \"%s\", \"%s\",\"%s\", \"%s\", \"%s\"]," + + "\"%s\":[\"%s\", \"%s\", \"%s\"]}", + InstancesAccessor.InstancesProperties.selection_base.name(), + InstancesAccessor.InstanceHealthSelectionBase.cross_zone_based.name(), + InstancesAccessor.InstancesProperties.instances.name(), "instance1", "instance3", + "instance6", "instance9", "instance10", "instance11", "instance12", "instance13", + "instance14", "invalidInstance", + InstancesAccessor.InstancesProperties.to_be_stopped_instances.name(), "instance0", + "invalidInstance1", "invalidInstance1"); + + Response response = new JerseyUriRequestBuilder( + "clusters/{}/instances?command=stoppable&skipHealthCheckCategories=CUSTOM_INSTANCE_CHECK,CUSTOM_PARTITION_CHECK").format( + STOPPABLE_CLUSTER2).post(this, Entity.entity(content, MediaType.APPLICATION_JSON_TYPE)); + JsonNode jsonNode = OBJECT_MAPPER.readTree(response.readEntity(String.class)); + + Set stoppableSet = getStringSet(jsonNode, + InstancesAccessor.InstancesProperties.instance_stoppable_parallel.name()); + Assert.assertTrue(stoppableSet.contains("instance14") && stoppableSet.contains("instance12") + && stoppableSet.contains("instance11") && stoppableSet.contains("instance10")); + + JsonNode nonStoppableInstances = jsonNode.get( + InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance13"), + ImmutableSet.of("HELIX:MIN_ACTIVE_REPLICA_CHECK_FAILED")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "invalidInstance"), + ImmutableSet.of("HELIX:INSTANCE_NOT_EXIST")); + System.out.println("End test :" + TestHelper.getTestMethodName()); + } + + + @Test(dependsOnMethods = "testInstanceStoppableZoneBasedWithToBeStoppedInstances") public void testInstanceStoppable_zoneBased_zoneOrder() throws IOException { System.out.println("Start test :" + TestHelper.getTestMethodName()); // Select instances with zone based diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/util/TestInstanceValidationUtilInRest.java b/helix-rest/src/test/java/org/apache/helix/rest/server/util/TestInstanceValidationUtilInRest.java index 35e6399e0f..e37da34ff6 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/util/TestInstanceValidationUtilInRest.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/util/TestInstanceValidationUtilInRest.java @@ -22,8 +22,10 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.helix.HelixDataAccessor; import org.apache.helix.PropertyKey; @@ -103,6 +105,52 @@ public void testPartitionLevelCheckInitState() { Assert.assertEquals(failedPartitions.keySet().size(), 2); } + @Test + public void testPartitionLevelCheckWithToBeStoppedNode() { + List externalViews = new ArrayList<>(Arrays.asList(prepareExternalViewOnline())); + Mock mock = new Mock(); + HelixDataAccessor accessor = mock.dataAccessor; + + when(mock.dataAccessor.keyBuilder()) + .thenReturn(new PropertyKey.Builder(TEST_CLUSTER)); + when(mock.dataAccessor + .getProperty(new PropertyKey.Builder(TEST_CLUSTER).stateModelDef(MasterSlaveSMD.name))) + .thenReturn(mock.stateModel); + when(mock.stateModel.getTopState()).thenReturn("MASTER"); + when(mock.stateModel.getInitialState()).thenReturn("OFFLINE"); + + Map> partitionStateMap = new HashMap<>(); + partitionStateMap.put("h1", new HashMap<>()); + partitionStateMap.put("h2", new HashMap<>()); + partitionStateMap.put("h3", new HashMap<>()); + partitionStateMap.put("h4", new HashMap<>()); + + partitionStateMap.get("h1").put("p1", true); + partitionStateMap.get("h2").put("p1", true); + partitionStateMap.get("h3").put("p1", true); + partitionStateMap.get("h4").put("p1", true); + + partitionStateMap.get("h1").put("p2", true); + partitionStateMap.get("h2").put("p2", false); + partitionStateMap.get("h3").put("p2", true); + + Set toBeStoppedInstances = new HashSet<>(); + toBeStoppedInstances.add("h3"); + Map> failedPartitions = InstanceValidationUtil.perPartitionHealthCheck( + externalViews, partitionStateMap, "h1", accessor, toBeStoppedInstances); + Assert.assertEquals(failedPartitions.keySet().size(), 1); + Assert.assertEquals(failedPartitions.get("p2").size(), 1); + Assert.assertTrue(failedPartitions.get("p2").contains("UNHEALTHY_PARTITION")); + + toBeStoppedInstances.remove("h3"); + toBeStoppedInstances.add("h2"); + failedPartitions = + InstanceValidationUtil.perPartitionHealthCheck(externalViews, partitionStateMap, "h1", + accessor, toBeStoppedInstances); + // Since we presume h2 as being already stopped, the health status of p2 on h2 will be skipped. + Assert.assertEquals(failedPartitions.keySet().size(), 0); + } + private ExternalView prepareExternalView() { ExternalView externalView = new ExternalView(RESOURCE_NAME); externalView.getRecord() @@ -163,6 +211,22 @@ private ExternalView prepareExternalViewOffline() { return externalView; } + private ExternalView prepareExternalViewOnline() { + ExternalView externalView = new ExternalView(RESOURCE_NAME); + externalView.getRecord() + .setSimpleField(ExternalView.ExternalViewProperty.STATE_MODEL_DEF_REF.toString(), + MasterSlaveSMD.name); + externalView.setState("p1", "h1", "MASTER"); + externalView.setState("p1", "h2", "SLAVE"); + externalView.setState("p1", "h3", "SLAVE"); + + externalView.setState("p2", "h1", "MASTER"); + externalView.setState("p2", "h2", "SLAVE"); + externalView.setState("p2", "h3", "SLAVE"); + + return externalView; + } + private final class Mock { private HelixDataAccessor dataAccessor = mock(HelixDataAccessor.class); private StateModelDefinition stateModel = mock(StateModelDefinition.class); From ac851955e8dabe72d49b51da4f256d2d550e1687 Mon Sep 17 00:00:00 2001 From: Zachary Pinto Date: Fri, 3 Nov 2023 15:12:14 -0700 Subject: [PATCH 03/11] HelixAdmin APIs and pipeline changes to support Helix Node Swap (#2661) Add ability for up to 2 nodes with the same logicalId to be added to the cluster at the same time when a SWAP is happening. During all paritionAssignment for WAGED and DelayedAutoRebalancer, we select just one instance for each logicalId. Achieves n -> n+1 for all replicas on SWAP_OUT node and back n when SWAP is marked complete, making it cancelable. Adding and updating Helix Admin APIs to support swap operation: setInstanceOperation addInstance canCompleteSwap completeSwapIfPossible * Refactor sanity checks for HelixAdmin swap APIs. * Helix Node Swap pipeline changes and integration tests. * Fix integration tests to properly restore stopped MockParticipant so following tests are not affected. * Add comments and docstrings. * Fix tests to clean up after themselves. * Optimize duplicate logicalId filtering to only be called on allNodes and then used to remove duplicate logicalIds from enabledLiveNodes. * Add handling for clusterConfig == null in updateSwappingInstances and fix AssignableNode to check for clusterTopologyConfig when attempting to get logicalId. * Fix integ tests. * Fix testGetDomainInformation since we no longer allow an instance to join the cluster with an invalid DOMAIN field. * Add checks to ensure that the SWAP_IN instance has a matching FAULT_ZONE and matching INSTANCE_CAPACITY_MAP to SWAP_OUT node. * Rename canSwapBeCompleted to canCompleteSwap. * Add sanity checks to allow SWAP_IN node to join the cluster in disabled state before SWAP_OUT node has instance operation set. * Fix print in test case. * Add canCompleteSwap to PerInstanceAccessor and fix formatting. * Fix flaky node swap after completion by making sure replica has is computed with logicalIds intead of instanceNames. --- .../java/org/apache/helix/HelixAdmin.java | 33 +- .../BaseControllerDataProvider.java | 71 ++ .../rebalancer/DelayedAutoRebalancer.java | 75 +- .../rebalancer/util/DelayedRebalanceUtil.java | 88 ++ .../waged/GlobalRebalanceRunner.java | 12 +- .../rebalancer/waged/WagedRebalancer.java | 57 +- .../constraints/ConstraintBasedAlgorithm.java | 4 +- .../waged/model/AssignableNode.java | 18 +- .../rebalancer/waged/model/ClusterModel.java | 8 + .../waged/model/ClusterModelProvider.java | 6 +- .../apache/helix/manager/zk/ZKHelixAdmin.java | 423 ++++++++- .../apache/helix/model/InstanceConfig.java | 28 + .../rebalancer/waged/TestWagedRebalancer.java | 4 +- .../rebalancer/TestInstanceOperation.java | 896 +++++++++++++++++- .../helix/manager/zk/TestZkHelixAdmin.java | 28 +- .../org/apache/helix/mock/MockHelixAdmin.java | 10 + .../server/resources/AbstractResource.java | 2 + .../resources/helix/PerInstanceAccessor.java | 103 +- 18 files changed, 1720 insertions(+), 146 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/HelixAdmin.java b/helix-core/src/main/java/org/apache/helix/HelixAdmin.java index 085a987b1e..f53b886e2b 100644 --- a/helix-core/src/main/java/org/apache/helix/HelixAdmin.java +++ b/helix-core/src/main/java/org/apache/helix/HelixAdmin.java @@ -23,6 +23,8 @@ import java.util.List; import java.util.Map; +import javax.annotation.Nullable; + import org.apache.helix.api.status.ClusterManagementMode; import org.apache.helix.api.status.ClusterManagementModeRequest; import org.apache.helix.api.topology.ClusterTopology; @@ -302,8 +304,15 @@ void enableInstance(String clusterName, String instanceName, boolean enabled, */ void enableInstance(String clusterName, List instances, boolean enabled); - void setInstanceOperation(String clusterName, String instance, - InstanceConstants.InstanceOperation instanceOperation); + /** + * Set the instanceOperation field. + * + * @param clusterName The cluster name + * @param instanceName The instance name + * @param instanceOperation The instance operation + */ + void setInstanceOperation(String clusterName, String instanceName, + @Nullable InstanceConstants.InstanceOperation instanceOperation); /** * Disable or enable a resource @@ -747,6 +756,26 @@ Map validateInstancesForWagedRebalance(String clusterName, */ boolean isEvacuateFinished(String clusterName, String instancesNames); + /** + * Check to see if swapping between two instances can be completed. Either the swapOut or + * swapIn instance can be passed in. + * @param clusterName The cluster name + * @param instanceName The instance that is being swapped out or swapped in + * @return True if the swap is ready to be completed, false otherwise. + */ + boolean canCompleteSwap(String clusterName, String instanceName); + + /** + * Check to see if swapping between two instances is ready to be completed and complete it if + * possible. Either the swapOut or swapIn instance can be passed in. + * + * @param clusterName The cluster name + * @param instanceName The instance that is being swapped out or swapped in + * @return True if the swap is ready to be completed and was completed successfully, false + * otherwise. + */ + boolean completeSwapIfPossible(String clusterName, String instanceName); + /** * Return if instance is ready for preparing joining cluster. The instance should have no current state, * no pending message and tagged with operation that exclude the instance from Helix assignment. diff --git a/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java b/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java index 8e8f9fa9b5..9dd5173841 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java +++ b/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java @@ -46,10 +46,12 @@ import org.apache.helix.common.caches.PropertyCache; import org.apache.helix.common.caches.TaskCurrentStateCache; import org.apache.helix.common.controllers.ControlContextProvider; +import org.apache.helix.constants.InstanceConstants; import org.apache.helix.controller.LogUtil; import org.apache.helix.controller.rebalancer.constraint.MonitoredAbnormalResolver; import org.apache.helix.model.ClusterConfig; import org.apache.helix.model.ClusterConstraints; +import org.apache.helix.model.ClusterTopologyConfig; import org.apache.helix.model.CurrentState; import org.apache.helix.model.IdealState; import org.apache.helix.model.InstanceConfig; @@ -116,6 +118,8 @@ public class BaseControllerDataProvider implements ControlContextProvider { private Map> _idealStateRuleMap; private final Map>> _disabledInstanceForPartitionMap = new HashMap<>(); private final Set _disabledInstanceSet = new HashSet<>(); + private final Map _swapOutInstanceNameToSwapInInstanceName = new HashMap<>(); + private final Set _enabledLiveSwapInInstanceNames = new HashSet<>(); private final Map _abnormalStateResolverMap = new HashMap<>(); private final Set _timedOutInstanceDuringMaintenance = new HashSet<>(); private Map _liveInstanceExcludeTimedOutForMaintenance = new HashMap<>(); @@ -437,6 +441,8 @@ protected synchronized Set doRefresh(HelixDataAccesso updateIdealRuleMap(getClusterConfig()); updateDisabledInstances(getInstanceConfigMap().values(), getClusterConfig()); + updateSwappingInstances(getInstanceConfigMap().values(), getEnabledLiveInstances(), + getClusterConfig()); return refreshedTypes; } @@ -471,6 +477,8 @@ public void setClusterConfig(ClusterConfig clusterConfig) { refreshAbnormalStateResolverMap(_clusterConfig); updateIdealRuleMap(_clusterConfig); updateDisabledInstances(getInstanceConfigMap().values(), _clusterConfig); + updateSwappingInstances(getInstanceConfigMap().values(), getEnabledLiveInstances(), + _clusterConfig); } @Override @@ -617,6 +625,24 @@ public Set getDisabledInstances() { return Collections.unmodifiableSet(_disabledInstanceSet); } + /** + * Get all swapping instance pairs. + * + * @return a map of SWAP_OUT instanceNames and their corresponding SWAP_IN instanceNames. + */ + public Map getSwapOutToSwapInInstancePairs() { + return Collections.unmodifiableMap(_swapOutInstanceNameToSwapInInstanceName); + } + + /** + * Get all the enabled and live SWAP_IN instances. + * + * @return a set of SWAP_IN instanceNames that have a corresponding SWAP_OUT instance. + */ + public Set getEnabledLiveSwapInInstanceNames() { + return Collections.unmodifiableSet(_enabledLiveSwapInInstanceNames); + } + public synchronized void setLiveInstances(List liveInstances) { _liveInstanceCache.setPropertyMap(HelixProperty.convertListToMap(liveInstances)); _updateInstanceOfflineTime = true; @@ -750,6 +776,8 @@ public Map getInstanceConfigMap() { public void setInstanceConfigMap(Map instanceConfigMap) { _instanceConfigCache.setPropertyMap(instanceConfigMap); updateDisabledInstances(instanceConfigMap.values(), getClusterConfig()); + updateSwappingInstances(instanceConfigMap.values(), getEnabledLiveInstances(), + getClusterConfig()); } /** @@ -858,6 +886,49 @@ private void updateDisabledInstances(Collection instanceConfigs, } } + private void updateSwappingInstances(Collection instanceConfigs, + Set liveEnabledInstances, ClusterConfig clusterConfig) { + _swapOutInstanceNameToSwapInInstanceName.clear(); + _enabledLiveSwapInInstanceNames.clear(); + + if (clusterConfig == null) { + logger.warn("Skip refreshing swapping instances because clusterConfig is null."); + return; + } + + ClusterTopologyConfig clusterTopologyConfig = + ClusterTopologyConfig.createFromClusterConfig(clusterConfig); + + Map swapOutLogicalIdsByInstanceName = new HashMap<>(); + Map swapInInstancesByLogicalId = new HashMap<>(); + instanceConfigs.forEach(instanceConfig -> { + if (instanceConfig == null) { + return; + } + if (instanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name())) { + swapOutLogicalIdsByInstanceName.put(instanceConfig.getInstanceName(), + instanceConfig.getLogicalId(clusterTopologyConfig.getEndNodeType())); + } + if (instanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_IN.name())) { + swapInInstancesByLogicalId.put( + instanceConfig.getLogicalId(clusterTopologyConfig.getEndNodeType()), + instanceConfig.getInstanceName()); + } + }); + + swapOutLogicalIdsByInstanceName.forEach((swapOutInstanceName, value) -> { + String swapInInstanceName = swapInInstancesByLogicalId.get(value); + if (swapInInstanceName != null) { + _swapOutInstanceNameToSwapInInstanceName.put(swapOutInstanceName, swapInInstanceName); + if (liveEnabledInstances.contains(swapInInstanceName)) { + _enabledLiveSwapInInstanceNames.add(swapInInstanceName); + } + } + }); + } + /* * Check if the instance is timed-out during maintenance mode. An instance is timed-out if it has * been offline for longer than the user defined timeout window. diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java index 9cd0f71cd7..442ddfb029 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java @@ -31,17 +31,17 @@ import java.util.Optional; import java.util.Set; -import java.util.stream.Collectors; import org.apache.helix.HelixDefinedState; import org.apache.helix.api.config.StateTransitionThrottleConfig; +import org.apache.helix.constants.InstanceConstants; import org.apache.helix.controller.dataproviders.ResourceControllerDataProvider; import org.apache.helix.controller.rebalancer.constraint.MonitoredAbnormalResolver; import org.apache.helix.controller.rebalancer.util.DelayedRebalanceUtil; import org.apache.helix.controller.rebalancer.util.WagedValidationUtil; import org.apache.helix.controller.stages.CurrentStateOutput; import org.apache.helix.model.ClusterConfig; +import org.apache.helix.model.ClusterTopologyConfig; import org.apache.helix.model.IdealState; -import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.Partition; import org.apache.helix.model.Resource; import org.apache.helix.model.ResourceAssignment; @@ -56,7 +56,8 @@ */ public class DelayedAutoRebalancer extends AbstractRebalancer { private static final Logger LOG = LoggerFactory.getLogger(DelayedAutoRebalancer.class); - public static final Set INSTANCE_OPERATION_TO_EXCLUDE_FROM_ASSIGNMENT = ImmutableSet.of("EVACUATE", "SWAP_IN"); + public static ImmutableSet INSTANCE_OPERATION_TO_EXCLUDE_FROM_ASSIGNMENT = + ImmutableSet.of(InstanceConstants.InstanceOperation.EVACUATE.name()); @Override public IdealState computeNewIdealState(String resourceName, @@ -113,9 +114,16 @@ public IdealState computeNewIdealState(String resourceName, allNodes = clusterData.getAllInstances(); } + Set allNodesDeduped = DelayedRebalanceUtil.filterOutInstancesWithDuplicateLogicalIds( + ClusterTopologyConfig.createFromClusterConfig(clusterConfig), + clusterData.getInstanceConfigMap(), allNodes); + // Remove the non-selected instances with duplicate logicalIds from liveEnabledNodes + // This ensures the same duplicate instance is kept in both allNodesDeduped and liveEnabledNodes + liveEnabledNodes.retainAll(allNodesDeduped); + long delay = DelayedRebalanceUtil.getRebalanceDelay(currentIdealState, clusterConfig); Set activeNodes = DelayedRebalanceUtil - .getActiveNodes(allNodes, currentIdealState, liveEnabledNodes, + .getActiveNodes(allNodesDeduped, currentIdealState, liveEnabledNodes, clusterData.getInstanceOfflineTimeMap(), clusterData.getLiveInstances().keySet(), clusterData.getInstanceConfigMap(), delay, clusterConfig); if (delayRebalanceEnabled) { @@ -127,11 +135,11 @@ public IdealState computeNewIdealState(String resourceName, clusterConfig, _manager); } - if (allNodes.isEmpty() || activeNodes.isEmpty()) { + if (allNodesDeduped.isEmpty() || activeNodes.isEmpty()) { LOG.error(String.format( "No instances or active instances available for resource %s, " + "allInstances: %s, liveInstances: %s, activeInstances: %s", - resourceName, allNodes, liveEnabledNodes, activeNodes)); + resourceName, allNodesDeduped, liveEnabledNodes, activeNodes)); return generateNewIdealState(resourceName, currentIdealState, emptyMapping(currentIdealState)); } @@ -157,41 +165,58 @@ public IdealState computeNewIdealState(String resourceName, getRebalanceStrategy(currentIdealState.getRebalanceStrategy(), allPartitions, resourceName, stateCountMap, maxPartition); - // sort node lists to ensure consistent preferred assignments - List allNodeList = new ArrayList<>(allNodes); - // We will not assign partition to instances with evacuation and wap-out tag. + List allNodeList = new ArrayList<>(allNodesDeduped); + // TODO: Currently we have 2 groups of instances and compute preference list twice and merge. // Eventually we want to have exclusive groups of instance for different instance tag. - List liveEnabledAssignableNodeList = filterOutOnOperationInstances(clusterData.getInstanceConfigMap(), - liveEnabledNodes); + List liveEnabledAssignableNodeList = new ArrayList<>( + // We will not assign partitions to instances with EVACUATE InstanceOperation. + DelayedRebalanceUtil.filterOutEvacuatingInstances(clusterData.getInstanceConfigMap(), + liveEnabledNodes)); + // sort node lists to ensure consistent preferred assignments Collections.sort(allNodeList); Collections.sort(liveEnabledAssignableNodeList); - ZNRecord newIdealMapping = _rebalanceStrategy - .computePartitionAssignment(allNodeList, liveEnabledAssignableNodeList, currentMapping, clusterData); + ZNRecord newIdealMapping = + _rebalanceStrategy.computePartitionAssignment(allNodeList, liveEnabledAssignableNodeList, + currentMapping, clusterData); ZNRecord finalMapping = newIdealMapping; if (DelayedRebalanceUtil.isDelayRebalanceEnabled(currentIdealState, clusterConfig) - || liveEnabledAssignableNodeList.size()!= activeNodes.size()) { + || liveEnabledAssignableNodeList.size() != activeNodes.size()) { List activeNodeList = new ArrayList<>(activeNodes); Collections.sort(activeNodeList); int minActiveReplicas = DelayedRebalanceUtil.getMinActiveReplica( ResourceConfig.mergeIdealStateWithResourceConfig(resourceConfig, currentIdealState), currentIdealState, replicaCount); - ZNRecord newActiveMapping = _rebalanceStrategy - .computePartitionAssignment(allNodeList, activeNodeList, currentMapping, clusterData); + ZNRecord newActiveMapping = + _rebalanceStrategy.computePartitionAssignment(allNodeList, activeNodeList, currentMapping, + clusterData); finalMapping = getFinalDelayedMapping(currentIdealState, newIdealMapping, newActiveMapping, liveEnabledNodes, replicaCount, minActiveReplicas); } finalMapping.getListFields().putAll(userDefinedPreferenceList); + // 1. Get all SWAP_OUT instances and corresponding SWAP_IN instance pairs in the cluster. + Map swapOutToSwapInInstancePairs = + clusterData.getSwapOutToSwapInInstancePairs(); + // 2. Get all enabled and live SWAP_IN instances in the cluster. + Set enabledLiveSwapInInstances = clusterData.getEnabledLiveSwapInInstanceNames(); + // 3. For each SWAP_OUT instance in any of the preferenceLists, add the corresponding SWAP_IN instance to the end. + // Skipping this when there are not SWAP_IN instances ready(enabled and live) will reduce computation time when there is not an active + // swap occurring. + if (!clusterData.getEnabledLiveSwapInInstanceNames().isEmpty()) { + DelayedRebalanceUtil.addSwapInInstanceToPreferenceListsIfSwapOutInstanceExists(finalMapping, + swapOutToSwapInInstancePairs, enabledLiveSwapInInstances); + } + LOG.debug("currentMapping: {}", currentMapping); LOG.debug("stateCountMap: {}", stateCountMap); LOG.debug("liveEnabledNodes: {}", liveEnabledNodes); LOG.debug("activeNodes: {}", activeNodes); - LOG.debug("allNodes: {}", allNodes); + LOG.debug("allNodes: {}", allNodesDeduped); LOG.debug("maxPartition: {}", maxPartition); LOG.debug("newIdealMapping: {}", newIdealMapping); LOG.debug("finalMapping: {}", finalMapping); @@ -201,14 +226,6 @@ public IdealState computeNewIdealState(String resourceName, return idealState; } - private static List filterOutOnOperationInstances(Map instanceConfigMap, - Set nodes) { - return nodes.stream() - .filter( - instance -> !INSTANCE_OPERATION_TO_EXCLUDE_FROM_ASSIGNMENT.contains(instanceConfigMap.get(instance).getInstanceOperation())) - .collect(Collectors.toList()); - } - private IdealState generateNewIdealState(String resourceName, IdealState currentIdealState, ZNRecord newMapping) { IdealState newIdealState = new IdealState(resourceName); @@ -376,7 +393,7 @@ protected Map computeBestPossibleStateForPartition(Set l // if preference list is not empty, and we do have new intanceToAdd, we // should check if it has capacity to hold the partition. boolean isWaged = WagedValidationUtil.isWagedEnabled(idealState) && cache != null; - if (isWaged && !isPreferenceListEmpty && instanceToAdd.size() > 0) { + if (isWaged && !isPreferenceListEmpty && !instanceToAdd.isEmpty()) { // check instanceToAdd instance appears in combinedPreferenceList for (String instance : instanceToAdd) { if (combinedPreferenceList.contains(instance)) { @@ -409,7 +426,11 @@ protected Map computeBestPossibleStateForPartition(Set l bestPossibleStateMap, preferenceList, combinedPreferenceList)) { for (int i = 0; i < combinedPreferenceList.size() - numReplicas; i++) { String instanceToDrop = combinedPreferenceList.get(combinedPreferenceList.size() - i - 1); - bestPossibleStateMap.put(instanceToDrop, HelixDefinedState.DROPPED.name()); + // We do not want to drop a SWAP_IN node if it is at the end of the preferenceList, + // because partitions are actively being added on this node to prepare for SWAP completion. + if (cache == null || !cache.getEnabledLiveSwapInInstanceNames().contains(instanceToDrop)) { + bestPossibleStateMap.put(instanceToDrop, HelixDefinedState.DROPPED.name()); + } } } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java index 58bad164a5..c7066d053d 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java @@ -34,12 +34,14 @@ import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica; import org.apache.helix.controller.rebalancer.waged.model.ClusterModelProvider; import org.apache.helix.model.ClusterConfig; +import org.apache.helix.model.ClusterTopologyConfig; import org.apache.helix.model.IdealState; import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.Partition; import org.apache.helix.model.ResourceAssignment; import org.apache.helix.model.ResourceConfig; import org.apache.helix.util.InstanceValidationUtil; +import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -139,6 +141,92 @@ public static Set filterOutEvacuatingInstances(Map filterOutInstancesWithDuplicateLogicalIds( + ClusterTopologyConfig clusterTopologyConfig, Map instanceConfigMap, + Set instances) { + Set filteredNodes = new HashSet<>(); + Map filteredInstancesByLogicalId = new HashMap<>(); + + instances.forEach(node -> { + InstanceConfig thisInstanceConfig = instanceConfigMap.get(node); + if (thisInstanceConfig == null) { + return; + } + String thisLogicalId = + thisInstanceConfig.getLogicalId(clusterTopologyConfig.getEndNodeType()); + + if (filteredInstancesByLogicalId.containsKey(thisLogicalId)) { + InstanceConfig filteredDuplicateInstanceConfig = + instanceConfigMap.get(filteredInstancesByLogicalId.get(thisLogicalId)); + if ((filteredDuplicateInstanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_IN.name()) + && thisInstanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name())) + || thisInstanceConfig.getInstanceOperation().isEmpty()) { + // If the already filtered instance is SWAP_IN and this instance is in SWAP_OUT, then replace the filtered + // instance with this instance. If this instance has no InstanceOperation, then replace the filtered instance + // with this instance. This is the case where the SWAP_IN node has been marked as complete or SWAP_IN exists and + // SWAP_OUT does not. There can never be a case where both have no InstanceOperation set. + filteredNodes.remove(filteredInstancesByLogicalId.get(thisLogicalId)); + filteredNodes.add(node); + filteredInstancesByLogicalId.put(thisLogicalId, node); + } + } else { + filteredNodes.add(node); + filteredInstancesByLogicalId.put(thisLogicalId, node); + } + }); + + return filteredNodes; + } + + /** + * Look through the provided mapping and add corresponding SWAP_IN node if a SWAP_OUT node exists + * in the partition's preference list. + * + * @param mapping the mapping to be updated (IdealState ZNRecord) + * @param swapOutToSwapInInstancePairs the map of SWAP_OUT to SWAP_IN instances + */ + public static void addSwapInInstanceToPreferenceListsIfSwapOutInstanceExists(ZNRecord mapping, + Map swapOutToSwapInInstancePairs, Set enabledLiveSwapInInstances) { + Map> preferenceListsByPartition = mapping.getListFields(); + for (String partition : preferenceListsByPartition.keySet()) { + List preferenceList = preferenceListsByPartition.get(partition); + if (preferenceList == null) { + continue; + } + List newInstancesToAdd = new ArrayList<>(); + for (String instanceName : preferenceList) { + if (swapOutToSwapInInstancePairs.containsKey(instanceName) + && enabledLiveSwapInInstances.contains( + swapOutToSwapInInstancePairs.get(instanceName))) { + String swapInInstanceName = swapOutToSwapInInstancePairs.get(instanceName); + if (!preferenceList.contains(swapInInstanceName) && !newInstancesToAdd.contains( + swapInInstanceName)) { + newInstancesToAdd.add(swapInInstanceName); + } + } + } + if (!newInstancesToAdd.isEmpty()) { + preferenceList.addAll(newInstancesToAdd); + } + } + } + /** * Return the time when an offline or disabled instance should be treated as inactive. Return -1 * if it is inactive now or forced to be rebalanced by an on-demand rebalance. diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/GlobalRebalanceRunner.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/GlobalRebalanceRunner.java index 6130e5c522..6c199bc1be 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/GlobalRebalanceRunner.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/GlobalRebalanceRunner.java @@ -30,10 +30,12 @@ import org.apache.helix.HelixRebalanceException; import org.apache.helix.controller.changedetector.ResourceChangeDetector; import org.apache.helix.controller.dataproviders.ResourceControllerDataProvider; +import org.apache.helix.controller.rebalancer.util.DelayedRebalanceUtil; import org.apache.helix.controller.rebalancer.util.WagedRebalanceUtil; import org.apache.helix.controller.rebalancer.waged.model.ClusterModel; import org.apache.helix.controller.rebalancer.waged.model.ClusterModelProvider; import org.apache.helix.controller.stages.CurrentStateOutput; +import org.apache.helix.model.ClusterTopologyConfig; import org.apache.helix.model.Resource; import org.apache.helix.model.ResourceAssignment; import org.apache.helix.monitoring.metrics.MetricCollector; @@ -163,8 +165,14 @@ private void doGlobalRebalance(ResourceControllerDataProvider clusterData, Map computeBestPossibleStates( ResourceControllerDataProvider clusterData, Map resourceMap, final CurrentStateOutput currentStateOutput, RebalanceAlgorithm algorithm) throws HelixRebalanceException { - Set activeNodes = DelayedRebalanceUtil - .getActiveNodes(clusterData.getAllInstances(), clusterData.getEnabledLiveInstances(), + + Set allNodesDeduped = DelayedRebalanceUtil.filterOutInstancesWithDuplicateLogicalIds( + ClusterTopologyConfig.createFromClusterConfig(clusterData.getClusterConfig()), + clusterData.getInstanceConfigMap(), clusterData.getAllInstances()); + // Remove the non-selected instances with duplicate logicalIds from liveEnabledNodes + // This ensures the same duplicate instance is kept in both allNodesDeduped and liveEnabledNodes + Set liveEnabledNodesDeduped = clusterData.getEnabledLiveInstances(); + liveEnabledNodesDeduped.retainAll(allNodesDeduped); + + Set activeNodes = + DelayedRebalanceUtil.getActiveNodes(allNodesDeduped, liveEnabledNodesDeduped, clusterData.getInstanceOfflineTimeMap(), clusterData.getLiveInstances().keySet(), clusterData.getInstanceConfigMap(), clusterData.getClusterConfig()); @@ -359,6 +368,20 @@ private Map convertResourceAssignment( // Sort the preference list according to state priority. newIdealState.setPreferenceLists( getPreferenceLists(assignments.get(resourceName), statePriorityMap)); + + // 1. Get all SWAP_OUT instances and corresponding SWAP_IN instance pairs in the cluster. + Map swapOutToSwapInInstancePairs = + clusterData.getSwapOutToSwapInInstancePairs(); + // 2. Get all enabled and live SWAP_IN instances in the cluster. + Set enabledLiveSwapInInstances = clusterData.getEnabledLiveSwapInInstanceNames(); + // 3. For each SWAP_OUT instance in any of the preferenceLists, add the corresponding SWAP_IN instance to the end. + // Skipping this when there are not SWAP_IN instances ready(enabled and live) will reduce computation time when there is not an active + // swap occurring. + if (!clusterData.getEnabledLiveSwapInInstanceNames().isEmpty()) { + DelayedRebalanceUtil.addSwapInInstanceToPreferenceListsIfSwapOutInstanceExists( + newIdealState.getRecord(), swapOutToSwapInInstancePairs, enabledLiveSwapInInstances); + } + // Note the state mapping in the new assignment won't directly propagate to the map fields. // The rebalancer will calculate for the final state mapping considering the current states. finalIdealStateMap.put(resourceName, newIdealState); @@ -398,7 +421,14 @@ private Map handleDelayedRebalanceMinActiveReplica( RebalanceAlgorithm algorithm) throws HelixRebalanceException { // the "real" live nodes at the time // TODO: this is a hacky way to filter our on operation instance. We should consider redesign `getEnabledLiveInstances()`. - final Set enabledLiveInstances = filterOutOnOperationInstances(clusterData.getInstanceConfigMap(), clusterData.getEnabledLiveInstances()); + final Set allNodesDeduped = DelayedRebalanceUtil.filterOutInstancesWithDuplicateLogicalIds( + ClusterTopologyConfig.createFromClusterConfig(clusterData.getClusterConfig()), + clusterData.getInstanceConfigMap(), clusterData.getAllInstances()); + final Set enabledLiveInstances = clusterData.getEnabledLiveInstances(); + // Remove the non-selected instances with duplicate logicalIds from liveEnabledNodes + // This ensures the same duplicate instance is kept in both allNodesDeduped and liveEnabledNodes + enabledLiveInstances.retainAll(allNodesDeduped); + if (activeNodes.equals(enabledLiveInstances) || !requireRebalanceOverwrite(clusterData, currentResourceAssignment)) { // no need for additional process, return the current resource assignment return currentResourceAssignment; @@ -427,14 +457,6 @@ private Map handleDelayedRebalanceMinActiveReplica( } } - private static Set filterOutOnOperationInstances(Map instanceConfigMap, - Set nodes) { - return nodes.stream() - .filter( - instance -> !DelayedAutoRebalancer.INSTANCE_OPERATION_TO_EXCLUDE_FROM_ASSIGNMENT.contains(instanceConfigMap.get(instance).getInstanceOperation())) - .collect(Collectors.toSet()); - } - /** * Emergency rebalance is scheduled to quickly handle urgent cases like reassigning partitions from inactive nodes * and addressing for partitions failing to meet minActiveReplicas. @@ -619,8 +641,15 @@ protected boolean requireRebalanceOverwrite(ResourceControllerDataProvider clust bestPossibleAssignment.values().parallelStream().forEach((resourceAssignment -> { String resourceName = resourceAssignment.getResourceName(); IdealState currentIdealState = clusterData.getIdealState(resourceName); - Set enabledLiveInstances = - filterOutOnOperationInstances(clusterData.getInstanceConfigMap(), clusterData.getEnabledLiveInstances()); + + Set allNodesDeduped = DelayedRebalanceUtil.filterOutInstancesWithDuplicateLogicalIds( + ClusterTopologyConfig.createFromClusterConfig(clusterData.getClusterConfig()), + clusterData.getInstanceConfigMap(), clusterData.getAllInstances()); + Set enabledLiveInstances = clusterData.getEnabledLiveInstances(); + // Remove the non-selected instances with duplicate logicalIds from liveEnabledNodes + // This ensures the same duplicate instance is kept in both allNodesDeduped and liveEnabledNodes + enabledLiveInstances.retainAll(allNodesDeduped); + int numReplica = currentIdealState.getReplicaCount(enabledLiveInstances.size()); int minActiveReplica = DelayedRebalanceUtil.getMinActiveReplica(ResourceConfig .mergeIdealStateWithResourceConfig(clusterData.getResourceConfig(resourceName), diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java index 5dbeb5c38a..77d56302c1 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java @@ -153,7 +153,7 @@ private Optional getNodeWithHighestPoints(AssignableReplica repl int idleScore1 = busyInstances.contains(instanceName1) ? 0 : 1; int idleScore2 = busyInstances.contains(instanceName2) ? 0 : 1; return idleScore1 != idleScore2 ? (idleScore1 - idleScore2) - : -instanceName1.compareTo(instanceName2); + : -nodeEntry1.getKey().compareTo(nodeEntry2.getKey()); } else { return scoreCompareResult; } @@ -193,7 +193,7 @@ private static class AssignableReplicaWithScore implements Comparable { // Immutable Instance Properties private final String _instanceName; + private final String _logicaId; private final String _faultZone; // maximum number of the partitions that can be assigned to the instance. private final int _maxPartition; @@ -72,8 +74,12 @@ public class AssignableNode implements Comparable { * ResourceConfig could * subject to change. If the assumption is no longer true, this function should become private. */ - AssignableNode(ClusterConfig clusterConfig, InstanceConfig instanceConfig, String instanceName) { + AssignableNode(ClusterConfig clusterConfig, ClusterTopologyConfig clusterTopologyConfig, + InstanceConfig instanceConfig, String instanceName) { _instanceName = instanceName; + _logicaId = clusterTopologyConfig != null ? instanceConfig.getLogicalId( + clusterTopologyConfig.getEndNodeType()) + : instanceName; Map instanceCapacity = fetchInstanceCapacity(clusterConfig, instanceConfig); _faultZone = computeFaultZone(clusterConfig, instanceConfig); _instanceTags = ImmutableSet.copyOf(instanceConfig.getTags()); @@ -86,6 +92,10 @@ public class AssignableNode implements Comparable { _currentAssignedReplicaMap = new HashMap<>(); } + AssignableNode(ClusterConfig clusterConfig, InstanceConfig instanceConfig, String instanceName) { + this(clusterConfig, null, instanceConfig, instanceName); + } + /** * This function should only be used to assign a set of new partitions that are not allocated on * this node. It's because the any exception could occur at the middle of batch assignment and the @@ -272,6 +282,10 @@ public String getInstanceName() { return _instanceName; } + public String getLogicalId() { + return _logicaId; + } + public Set getInstanceTags() { return _instanceTags; } @@ -368,7 +382,7 @@ public int hashCode() { @Override public int compareTo(AssignableNode o) { - return _instanceName.compareTo(o.getInstanceName()); + return _logicaId.compareTo(o.getLogicalId()); } @Override diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModel.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModel.java index 42eaabaf93..7ef503e013 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModel.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModel.java @@ -37,6 +37,7 @@ public class ClusterModel { // Note that the identical replicas are deduped in the index. private final Map> _assignableReplicaIndex; private final Map _assignableNodeMap; + private final Set _assignableNodeLogicalIds; /** * @param clusterContext The initialized cluster context. @@ -60,6 +61,9 @@ public class ClusterModel { _assignableNodeMap = assignableNodes.parallelStream() .collect(Collectors.toMap(AssignableNode::getInstanceName, node -> node)); + _assignableNodeLogicalIds = + assignableNodes.parallelStream().map(AssignableNode::getLogicalId) + .collect(Collectors.toSet()); } public ClusterContext getContext() { @@ -70,6 +74,10 @@ public Map getAssignableNodes() { return _assignableNodeMap; } + public Set getAssignableLogicalIds() { + return _assignableNodeLogicalIds; + } + public Map> getAssignableReplicaMap() { return _assignableReplicaMap; } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java index dffaec3e04..3f16732107 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java @@ -34,6 +34,7 @@ import org.apache.helix.controller.dataproviders.ResourceControllerDataProvider; import org.apache.helix.controller.rebalancer.util.DelayedRebalanceUtil; import org.apache.helix.model.ClusterConfig; +import org.apache.helix.model.ClusterTopologyConfig; import org.apache.helix.model.IdealState; import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.Partition; @@ -530,9 +531,12 @@ public static Map>> getStateInstanceMap( */ private static Set getAllAssignableNodes(ClusterConfig clusterConfig, Map instanceConfigMap, Set activeInstances) { + ClusterTopologyConfig clusterTopologyConfig = + ClusterTopologyConfig.createFromClusterConfig(clusterConfig); return activeInstances.parallelStream() .filter(instanceConfigMap::containsKey).map( - instanceName -> new AssignableNode(clusterConfig, instanceConfigMap.get(instanceName), + instanceName -> new AssignableNode(clusterConfig, clusterTopologyConfig, + instanceConfigMap.get(instanceName), instanceName)).collect(Collectors.toSet()); } diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java index ebbcf64d02..34bd564878 100644 --- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java +++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java @@ -39,6 +39,10 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import javax.annotation.Nullable; + +import com.google.common.collect.ImmutableSet; +import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.helix.AccessOption; import org.apache.helix.BaseDataAccessor; import org.apache.helix.ConfigAccessor; @@ -47,7 +51,6 @@ import org.apache.helix.HelixDataAccessor; import org.apache.helix.HelixDefinedState; import org.apache.helix.HelixException; -import org.apache.helix.HelixProperty; import org.apache.helix.InstanceType; import org.apache.helix.PropertyKey; import org.apache.helix.PropertyPathBuilder; @@ -67,6 +70,7 @@ import org.apache.helix.model.ClusterConstraints; import org.apache.helix.model.ClusterConstraints.ConstraintType; import org.apache.helix.model.ClusterStatus; +import org.apache.helix.model.ClusterTopologyConfig; import org.apache.helix.model.ConstraintItem; import org.apache.helix.model.ControllerHistory; import org.apache.helix.model.CurrentState; @@ -86,6 +90,7 @@ import org.apache.helix.model.PauseSignal; import org.apache.helix.model.ResourceConfig; import org.apache.helix.model.StateModelDefinition; +import org.apache.helix.model.builder.HelixConfigScopeBuilder; import org.apache.helix.msdcommon.exception.InvalidRoutingDataException; import org.apache.helix.tools.DefaultIdealStateCalculator; import org.apache.helix.util.ConfigStringUtil; @@ -114,6 +119,8 @@ public class ZKHelixAdmin implements HelixAdmin { public static final String CONNECTION_TIMEOUT = "helixAdmin.timeOutInSec"; private static final String MAINTENANCE_ZNODE_ID = "maintenance"; private static final int DEFAULT_SUPERCLUSTER_REPLICA = 3; + private static final ImmutableSet ALLOWED_INSTANCE_OPERATIONS_FOR_ADD_INSTANCE = + ImmutableSet.of("", InstanceConstants.InstanceOperation.SWAP_IN.name()); private final RealmAwareZkClient _zkClient; private final ConfigAccessor _configAccessor; @@ -197,6 +204,108 @@ public void addInstance(String clusterName, InstanceConfig instanceConfig) { throw new HelixException("Node " + nodeId + " already exists in cluster " + clusterName); } + if (!ALLOWED_INSTANCE_OPERATIONS_FOR_ADD_INSTANCE.contains( + instanceConfig.getInstanceOperation())) { + throw new HelixException( + "Instance can only be added if InstanceOperation is set to one of" + "the following: " + + ALLOWED_INSTANCE_OPERATIONS_FOR_ADD_INSTANCE + " This instance: " + nodeId + + " has InstanceOperation set to " + instanceConfig.getInstanceOperation()); + } + + // Get the topology key used to determine the logicalId of a node. + ClusterConfig clusterConfig = _configAccessor.getClusterConfig(clusterName); + ClusterTopologyConfig clusterTopologyConfig = + ClusterTopologyConfig.createFromClusterConfig(clusterConfig); + String logicalIdKey = clusterTopologyConfig.getEndNodeType(); + String faultZoneKey = clusterTopologyConfig.getFaultZoneType(); + String toAddInstanceLogicalId = instanceConfig.getLogicalId(logicalIdKey); + + HelixConfigScope instanceConfigScope = + new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.PARTICIPANT, + clusterName).build(); + List existingInstanceIds = getConfigKeys(instanceConfigScope); + List foundInstanceConfigsWithMatchingLogicalId = + existingInstanceIds.parallelStream() + .map(existingInstanceId -> getInstanceConfig(clusterName, existingInstanceId)).filter( + existingInstanceConfig -> existingInstanceConfig.getLogicalId(logicalIdKey) + .equals(toAddInstanceLogicalId)).collect(Collectors.toList()); + + if (foundInstanceConfigsWithMatchingLogicalId.size() >= 2) { + // If the length is 2, we cannot add an instance with the same logicalId as an existing instance + // regardless of InstanceOperation. + throw new HelixException( + "There can only be 2 instances with the same logicalId in a cluster. " + + "Existing instances: " + foundInstanceConfigsWithMatchingLogicalId.get(0) + .getInstanceName() + " and " + foundInstanceConfigsWithMatchingLogicalId.get(1) + .getInstanceName() + " already have the same logicalId: " + toAddInstanceLogicalId + + "; therefore, " + nodeId + " cannot be added to the cluster."); + } else if (foundInstanceConfigsWithMatchingLogicalId.size() == 1) { + // If there is only one instance with the same logicalId, we can infer that the intended behaviour + // is to SWAP_IN. + + // If the InstanceOperation is unset, we will set it to SWAP_IN. + if (!instanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_IN.name())) { + instanceConfig.setInstanceOperation(InstanceConstants.InstanceOperation.SWAP_IN); + } + + // If the existing instance with the same logicalId does not have InstanceOperation set to SWAP_OUT and this instance + // is attempting to join as enabled, we cannot add this instance. + if (instanceConfig.getInstanceEnabled() && !foundInstanceConfigsWithMatchingLogicalId.get(0) + .getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name())) { + throw new HelixException( + "Instance can only be added if the exising instance sharing the same logicalId has InstanceOperation" + + " set to " + InstanceConstants.InstanceOperation.SWAP_OUT.name() + + " and this instance has InstanceOperation set to " + + InstanceConstants.InstanceOperation.SWAP_IN.name() + ". " + "Existing instance: " + + foundInstanceConfigsWithMatchingLogicalId.get(0).getInstanceName() + + " has InstanceOperation: " + + foundInstanceConfigsWithMatchingLogicalId.get(0).getInstanceOperation() + + " and this instance: " + nodeId + " has InstanceOperation: " + + instanceConfig.getInstanceOperation()); + } + + // If the existing instance with the same logicalId is not in the same FAULT_ZONE as this instance, we cannot + // add this instance. + if (!foundInstanceConfigsWithMatchingLogicalId.get(0).getDomainAsMap() + .containsKey(faultZoneKey) || !instanceConfig.getDomainAsMap().containsKey(faultZoneKey) + || !foundInstanceConfigsWithMatchingLogicalId.get(0).getDomainAsMap().get(faultZoneKey) + .equals(instanceConfig.getDomainAsMap().get(faultZoneKey))) { + throw new HelixException( + "Instance can only be added if the SWAP_OUT instance sharing the same logicalId is in the same FAULT_ZONE" + + " as this instance. " + "Existing instance: " + + foundInstanceConfigsWithMatchingLogicalId.get(0).getInstanceName() + + " has FAULT_ZONE_TYPE: " + foundInstanceConfigsWithMatchingLogicalId.get(0) + .getDomainAsMap().get(faultZoneKey) + " and this instance: " + nodeId + + " has FAULT_ZONE_TYPE: " + instanceConfig.getDomainAsMap().get(faultZoneKey)); + } + + Map foundInstanceCapacityMap = + foundInstanceConfigsWithMatchingLogicalId.get(0).getInstanceCapacityMap().isEmpty() + ? clusterConfig.getDefaultInstanceCapacityMap() + : foundInstanceConfigsWithMatchingLogicalId.get(0).getInstanceCapacityMap(); + Map instanceCapacityMap = instanceConfig.getInstanceCapacityMap().isEmpty() + ? clusterConfig.getDefaultInstanceCapacityMap() : instanceConfig.getInstanceCapacityMap(); + // If the instance does not have the same capacity, we cannot add this instance. + if (!new EqualsBuilder().append(foundInstanceCapacityMap, instanceCapacityMap).isEquals()) { + throw new HelixException( + "Instance can only be added if the SWAP_OUT instance sharing the same logicalId has the same capacity" + + " as this instance. " + "Existing instance: " + + foundInstanceConfigsWithMatchingLogicalId.get(0).getInstanceName() + + " has capacity: " + foundInstanceCapacityMap + " and this instance: " + nodeId + + " has capacity: " + instanceCapacityMap); + } + } else if (!instanceConfig.getInstanceOperation().isEmpty()) { + // If there are no instances with the same logicalId, we can only add this instance if InstanceOperation + // is unset because it is a new instance. + throw new HelixException( + "There is no instance with logicalId: " + toAddInstanceLogicalId + " in cluster: " + + clusterName + "; therefore, " + nodeId + + " cannot join cluster with InstanceOperation set to " + + instanceConfig.getInstanceOperation() + "."); + } + ZKUtil.createChildren(_zkClient, instanceConfigsPath, instanceConfig.getRecord()); _zkClient.createPersistent(PropertyPathBuilder.instanceMessage(clusterName, nodeId), true); @@ -358,6 +467,21 @@ public void enableInstance(final String clusterName, final String instanceName, logger.info("{} instance {} in cluster {}.", enabled ? "Enable" : "Disable", instanceName, clusterName); BaseDataAccessor baseAccessor = new ZkBaseDataAccessor<>(_zkClient); + + // If enabled is set to true and InstanceOperation is SWAP_IN, we should fail if there is not a + // matching SWAP_OUT instance. + InstanceConfig instanceConfig = getInstanceConfig(clusterName, instanceName); + if (enabled && instanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_IN.name())) { + InstanceConfig matchingSwapInstance = findMatchingSwapInstance(clusterName, instanceConfig); + if (matchingSwapInstance == null || !matchingSwapInstance.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name())) { + throw new HelixException("Instance cannot be enabled if InstanceOperation is set to " + + instanceConfig.getInstanceOperation() + " when there is no matching " + + InstanceConstants.InstanceOperation.SWAP_OUT.name() + " instance."); + } + } + // Eventually we will have all instances' enable/disable information in clusterConfig. Now we // update both instanceConfig and clusterConfig in transition period. enableSingleInstance(clusterName, instanceName, enabled, baseAccessor, disabledType, reason); @@ -379,11 +503,53 @@ public void enableInstance(String clusterName, List instances, boolean e @Override // TODO: Name may change in future public void setInstanceOperation(String clusterName, String instanceName, - InstanceConstants.InstanceOperation instanceOperation) { + @Nullable InstanceConstants.InstanceOperation instanceOperation) { BaseDataAccessor baseAccessor = new ZkBaseDataAccessor<>(_zkClient); String path = PropertyPathBuilder.instanceConfig(clusterName, instanceName); + // InstanceOperation can only be set to SWAP_IN when the instance is added to the cluster + // or if it is disabled. + if (instanceOperation != null && instanceOperation.equals( + InstanceConstants.InstanceOperation.SWAP_IN) && getInstanceConfig(clusterName, + instanceName).getInstanceEnabled()) { + throw new HelixException("InstanceOperation should only be set to " + + InstanceConstants.InstanceOperation.SWAP_IN.name() + + " when an instance joins the cluster for the first time(when " + + "creating the InstanceConfig) or is disabled."); + } + + // InstanceOperation cannot be set to null if there is an instance with the same logicalId in + // the cluster which does not have InstanceOperation set to SWAP_IN or SWAP_OUT. + if (instanceOperation == null) { + InstanceConfig instanceConfig = getInstanceConfig(clusterName, instanceName); + String logicalIdKey = ClusterTopologyConfig.createFromClusterConfig( + _configAccessor.getClusterConfig(clusterName)).getEndNodeType(); + + HelixConfigScope instanceConfigScope = + new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.PARTICIPANT, + clusterName).build(); + List existingInstanceIds = getConfigKeys(instanceConfigScope); + List matchingInstancesWithNonSwappingInstanceOperation = + existingInstanceIds.parallelStream() + .map(existingInstanceId -> getInstanceConfig(clusterName, existingInstanceId)).filter( + existingInstanceConfig -> + !existingInstanceConfig.getInstanceName().equals(instanceName) + && existingInstanceConfig.getLogicalId(logicalIdKey) + .equals(instanceConfig.getLogicalId(logicalIdKey)) + && !existingInstanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_IN.name()) + && !existingInstanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name())) + .collect(Collectors.toList()); + + if (!matchingInstancesWithNonSwappingInstanceOperation.isEmpty()) { + throw new HelixException("InstanceOperation cannot be set to null for " + instanceName + + " if there are other instances with the same logicalId in the cluster that do not have" + + " InstanceOperation set to SWAP_IN or SWAP_OUT."); + } + } + if (!baseAccessor.exists(path, 0)) { throw new HelixException( "Cluster " + clusterName + ", instance: " + instanceName + ", instance config does not exist"); @@ -410,16 +576,263 @@ public ZNRecord update(ZNRecord currentData) { @Override public boolean isEvacuateFinished(String clusterName, String instanceName) { - if (!instanceHasCurrentSateOrMessage(clusterName, instanceName)) { + if (!instanceHasCurrentStateOrMessage(clusterName, instanceName)) { InstanceConfig config = getInstanceConfig(clusterName, instanceName); return config != null && config.getInstanceOperation().equals(InstanceConstants.InstanceOperation.EVACUATE.name()); } return false; } + /** + * Find the instance that the passed instance is swapping with. If the passed instance has + * SWAP_OUT instanceOperation, then find the corresponding instance that has SWAP_IN + * instanceOperation. If the passed instance has SWAP_IN instanceOperation, then find the + * corresponding instance that has SWAP_OUT instanceOperation. + * + * @param clusterName The cluster name + * @param instanceConfig The instance to find the swap instance for + * @return The swap instance if found, null otherwise. + */ + @Nullable + private InstanceConfig findMatchingSwapInstance(String clusterName, + InstanceConfig instanceConfig) { + String logicalIdKey = + ClusterTopologyConfig.createFromClusterConfig(_configAccessor.getClusterConfig(clusterName)) + .getEndNodeType(); + + for (String potentialSwappingInstance : getConfigKeys( + new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.PARTICIPANT, + clusterName).build())) { + InstanceConfig potentialSwappingInstanceConfig = + getInstanceConfig(clusterName, potentialSwappingInstance); + + // Return if there is a matching Instance with the same logicalId and opposite InstanceOperation swap operation. + if (potentialSwappingInstanceConfig.getLogicalId(logicalIdKey) + .equals(instanceConfig.getLogicalId(logicalIdKey)) && ( + instanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_IN.name()) + && potentialSwappingInstanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name())) || ( + instanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name()) + && potentialSwappingInstanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_IN.name()))) { + return potentialSwappingInstanceConfig; + } + } + + return null; + } + + /** + * Check to see if swapping between two instances is ready to be completed. Checks: 1. Both + * instances must be alive. 2. Both instances must only have one session and not be carrying over + * from a previous session. 3. Both instances must have no pending messages. 4. Both instances + * cannot have partitions in the ERROR state 5. SwapIn instance must have correct state for all + * partitions that are currently assigned to the SwapOut instance. + * TODO: We may want to make this a public API in the future. + * + * @param clusterName The cluster name + * @param swapOutInstanceName The instance that is being swapped out + * @param swapInInstanceName The instance that is being swapped in + * @return True if the swap is ready to be completed, false otherwise. + */ + private boolean canCompleteSwap(String clusterName, String swapOutInstanceName, + String swapInInstanceName) { + BaseDataAccessor baseAccessor = new ZkBaseDataAccessor(_zkClient); + HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, baseAccessor); + PropertyKey.Builder keyBuilder = accessor.keyBuilder(); + + // 1. Check that both instances are alive. + LiveInstance swapOutLiveInstance = + accessor.getProperty(keyBuilder.liveInstance(swapOutInstanceName)); + LiveInstance swapInLiveInstance = + accessor.getProperty(keyBuilder.liveInstance(swapInInstanceName)); + if (swapOutLiveInstance == null || swapInLiveInstance == null) { + logger.warn( + "SwapOutInstance {} is {} and SwapInInstance {} is {} for cluster {}. Swap will not complete unless both instances are ONLINE.", + swapOutInstanceName, swapOutLiveInstance != null ? "ONLINE" : "OFFLINE", + swapInInstanceName, swapInLiveInstance != null ? "ONLINE" : "OFFLINE", clusterName); + return false; + } + + // 2. Check that both instances only have one session and are not carrying any over. + // count number of sessions under CurrentState folder. If it is carrying over from prv session, + // then there are > 1 session ZNodes. + List swapOutSessions = baseAccessor.getChildNames( + PropertyPathBuilder.instanceCurrentState(clusterName, swapOutInstanceName), 0); + List swapInSessions = baseAccessor.getChildNames( + PropertyPathBuilder.instanceCurrentState(clusterName, swapInInstanceName), 0); + if (swapOutSessions.size() > 1 || swapInSessions.size() > 1) { + logger.warn( + "SwapOutInstance {} is carrying over from prev session and SwapInInstance {} is carrying over from prev session for cluster {}." + + " Swap will not complete unless both instances have only one session.", + swapOutInstanceName, swapInInstanceName, clusterName); + return false; + } + + // 3. Check that the swapOutInstance has no pending messages. + List swapOutMessages = + accessor.getChildValues(keyBuilder.messages(swapOutInstanceName), true); + int swapOutPendingMessageCount = swapOutMessages != null ? swapOutMessages.size() : 0; + List swapInMessages = + accessor.getChildValues(keyBuilder.messages(swapInInstanceName), true); + int swapInPendingMessageCount = swapInMessages != null ? swapInMessages.size() : 0; + if (swapOutPendingMessageCount > 0 || swapInPendingMessageCount > 0) { + logger.warn( + "SwapOutInstance {} has {} pending messages and SwapInInstance {} has {} pending messages for cluster {}." + + " Swap will not complete unless both instances have no pending messages.", + swapOutInstanceName, swapOutPendingMessageCount, swapInInstanceName, + swapInPendingMessageCount, clusterName); + return false; + } + + // 4. Collect a list of all partitions that have a current state on swapOutInstance + String swapOutActiveSession = swapOutLiveInstance.getEphemeralOwner(); + String swapInActiveSession = swapInLiveInstance.getEphemeralOwner(); + + // Iterate over all resources with current states on the swapOutInstance + List swapOutResources = baseAccessor.getChildNames( + PropertyPathBuilder.instanceCurrentState(clusterName, swapOutInstanceName, + swapOutActiveSession), 0); + for (String swapOutResource : swapOutResources) { + // Get the topState and secondTopStates for the stateModelDef used by the resource. + IdealState idealState = accessor.getProperty(keyBuilder.idealStates(swapOutResource)); + StateModelDefinition stateModelDefinition = + accessor.getProperty(keyBuilder.stateModelDef(idealState.getStateModelDefRef())); + String topState = stateModelDefinition.getTopState(); + Set secondTopStates = stateModelDefinition.getSecondTopStates(); + + CurrentState swapOutResourceCurrentState = accessor.getProperty( + keyBuilder.currentState(swapOutInstanceName, swapOutActiveSession, swapOutResource)); + CurrentState swapInResourceCurrentState = accessor.getProperty( + keyBuilder.currentState(swapInInstanceName, swapInActiveSession, swapOutResource)); + + // Check to make sure swapInInstance has a current state for the resource + if (swapInResourceCurrentState == null) { + logger.warn( + "SwapOutInstance {} has current state for resource {} but SwapInInstance {} does not for cluster {}." + + " Swap will not complete unless both instances have current states for all resources.", + swapOutInstanceName, swapOutResource, swapInInstanceName, clusterName); + return false; + } + + // Iterate over all partitions in the swapOutInstance's current state for the resource + // and ensure that the swapInInstance has the correct state for the partition. + for (String partitionName : swapOutResourceCurrentState.getPartitionStateMap().keySet()) { + String swapOutPartitionState = swapOutResourceCurrentState.getState(partitionName); + String swapInPartitionState = swapInResourceCurrentState.getState(partitionName); + + // Neither instance should have any partitions in ERROR state. + if (swapOutPartitionState.equals(HelixDefinedState.ERROR.name()) + || swapInPartitionState.equals(HelixDefinedState.ERROR.name())) { + logger.warn( + "SwapOutInstance {} has partition {} in state {} and SwapInInstance {} has partition {} in state {} for cluster {}." + + " Swap will not complete unless both instances have no partitions in ERROR state.", + swapOutInstanceName, partitionName, swapOutPartitionState, swapInInstanceName, + partitionName, swapInPartitionState, clusterName); + return false; + } + + // When the state of a partition on a swapOut instance is in the topState, the state + // of the partition on the swapInInstance should also be in the topState or a secondTopState. + // It should be in a topState only if the state model allows multiple replicas in the topState. + // In all other cases it should be a secondTopState. + if (swapOutPartitionState.equals(topState) && !(swapInPartitionState.equals(topState) + || secondTopStates.contains(swapInPartitionState))) { + logger.warn( + "SwapOutInstance {} has partition {} in topState {} but SwapInInstance {} has partition {} in state {} for cluster {}." + + " Swap will not complete unless SwapInInstance has partition in topState or secondState.", + swapOutInstanceName, partitionName, swapOutPartitionState, swapInInstanceName, + partitionName, swapInPartitionState, clusterName); + return false; + } + + // When the state of a partition on a swapOut instance is any other state, except ERROR, DROPPED or TopState, + // the state of the partition on the swapInInstance should be the same. + if (!swapOutPartitionState.equals(topState) && !swapOutPartitionState.equals( + HelixDefinedState.DROPPED.name()) + && !swapOutPartitionState.equals(swapInPartitionState)) { + logger.warn( + "SwapOutInstance {} has partition {} in state {} but SwapInInstance {} has partition {} in state {} for cluster {}." + + " Swap will not complete unless both instances have matching states.", + swapOutInstanceName, partitionName, swapOutPartitionState, swapInInstanceName, + partitionName, swapInPartitionState, clusterName); + return false; + } + } + } + + return true; + } + + @Override + public boolean canCompleteSwap(String clusterName, String instanceName) { + InstanceConfig instanceConfig = getInstanceConfig(clusterName, instanceName); + if (instanceConfig == null) { + logger.warn( + "Instance {} in cluster {} does not exist. Cannot determine if the swap is complete.", + instanceName, clusterName); + return false; + } + + InstanceConfig swapOutInstanceConfig = instanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name()) ? instanceConfig + : findMatchingSwapInstance(clusterName, instanceConfig); + InstanceConfig swapInInstanceConfig = instanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_IN.name()) ? instanceConfig + : findMatchingSwapInstance(clusterName, instanceConfig); + if (swapOutInstanceConfig == null || swapInInstanceConfig == null) { + logger.warn( + "Instance {} in cluster {} is not swapping with any other instance. Cannot determine if the swap is complete.", + instanceName, clusterName); + return false; + } + + // Check if the swap is ready to be completed. + return canCompleteSwap(clusterName, swapOutInstanceConfig.getInstanceName(), + swapInInstanceConfig.getInstanceName()); + } + + @Override + public boolean completeSwapIfPossible(String clusterName, String instanceName) { + InstanceConfig instanceConfig = getInstanceConfig(clusterName, instanceName); + if (instanceConfig == null) { + logger.warn( + "Instance {} in cluster {} does not exist. Cannot determine if the swap is complete.", + instanceName, clusterName); + return false; + } + + InstanceConfig swapOutInstanceConfig = instanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name()) ? instanceConfig + : findMatchingSwapInstance(clusterName, instanceConfig); + InstanceConfig swapInInstanceConfig = instanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_IN.name()) ? instanceConfig + : findMatchingSwapInstance(clusterName, instanceConfig); + if (swapOutInstanceConfig == null || swapInInstanceConfig == null) { + logger.warn( + "Instance {} in cluster {} is not swapping with any other instance. Cannot determine if the swap is complete.", + instanceName, clusterName); + return false; + } + + // Check if the swap is ready to be completed. If not, return false. + if (!canCompleteSwap(clusterName, swapOutInstanceConfig.getInstanceName(), + swapInInstanceConfig.getInstanceName())) { + return false; + } + + // Complete the swap by removing the InstanceOperation for the SWAP_IN node and disabling the SWAP_OUT node. + setInstanceOperation(clusterName, swapInInstanceConfig.getInstanceName(), null); + enableInstance(clusterName, swapOutInstanceConfig.getInstanceName(), false); + + return true; + } + @Override public boolean isReadyForPreparingJoiningCluster(String clusterName, String instanceName) { - if (!instanceHasCurrentSateOrMessage(clusterName, instanceName)) { + if (!instanceHasCurrentStateOrMessage(clusterName, instanceName)) { InstanceConfig config = getInstanceConfig(clusterName, instanceName); return config != null && DelayedAutoRebalancer.INSTANCE_OPERATION_TO_EXCLUDE_FROM_ASSIGNMENT.contains( config.getInstanceOperation()); @@ -434,7 +847,7 @@ public boolean isReadyForPreparingJoiningCluster(String clusterName, String inst * @param instanceName * @return */ - private boolean instanceHasCurrentSateOrMessage(String clusterName, String instanceName) { + private boolean instanceHasCurrentStateOrMessage(String clusterName, String instanceName) { HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor(_zkClient)); PropertyKey.Builder keyBuilder = accessor.keyBuilder(); diff --git a/helix-core/src/main/java/org/apache/helix/model/InstanceConfig.java b/helix-core/src/main/java/org/apache/helix/model/InstanceConfig.java index b68250dd37..98da7340f2 100644 --- a/helix-core/src/main/java/org/apache/helix/model/InstanceConfig.java +++ b/helix-core/src/main/java/org/apache/helix/model/InstanceConfig.java @@ -702,6 +702,18 @@ public String getInstanceName() { return _record.getId(); } + /** + * Get the logicalId of this instance. If it does not exist or is not set, + * return the instance name. + * @param logicalIdKey the key for the DOMAIN field containing the logicalId + * @return the logicalId of this instance + */ + public String getLogicalId(String logicalIdKey) { + // TODO: Consider caching DomainMap, parsing the DOMAIN string every time + // getLogicalId is called can become expensive if called too frequently. + return getDomainAsMap().getOrDefault(logicalIdKey, getInstanceName()); + } + @Override public boolean isValid() { // HELIX-65: remove check for hostname/port existence @@ -772,6 +784,7 @@ public static class Builder { private int _weight = WEIGHT_NOT_SET; private List _tags = new ArrayList<>(); private boolean _instanceEnabled = HELIX_ENABLED_DEFAULT_VALUE; + private InstanceConstants.InstanceOperation _instanceOperation; private Map _instanceInfoMap; private Map _instanceCapacityMap; @@ -819,6 +832,10 @@ public InstanceConfig build(String instanceId) { instanceConfig.setInstanceEnabled(_instanceEnabled); } + if (_instanceOperation != null) { + instanceConfig.setInstanceOperation(_instanceOperation); + } + if (_instanceInfoMap != null) { instanceConfig.setInstanceInfoMap(_instanceInfoMap); } @@ -890,6 +907,17 @@ public Builder setInstanceEnabled(boolean instanceEnabled) { return this; } + /** + * Set the instance operation for this instance + * + * @param instanceOperation the instance operation. + * @return InstanceConfig.Builder + */ + public Builder setInstanceOperation(InstanceConstants.InstanceOperation instanceOperation) { + _instanceOperation = instanceOperation; + return this; + } + /** * Set the INSTANCE_INFO_MAP for this instance * @param instanceInfoMap the instance info map diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/TestWagedRebalancer.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/TestWagedRebalancer.java index 608a4d3afe..000978ef1a 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/TestWagedRebalancer.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/TestWagedRebalancer.java @@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import java.io.IOException; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -738,7 +739,8 @@ public void testRebalanceOverwrite() throws HelixRebalanceException, IOException instances.add(offlineInstance); when(clusterData.getAllInstances()).thenReturn(instances); when(clusterData.getEnabledInstances()).thenReturn(instances); - when(clusterData.getEnabledLiveInstances()).thenReturn(ImmutableSet.of(instance0, instance1, instance2)); + when(clusterData.getEnabledLiveInstances()).thenReturn( + new HashSet<>(Arrays.asList(instance0, instance1, instance2))); Map instanceOfflineTimeMap = new HashMap<>(); instanceOfflineTimeMap.put(offlineInstance, System.currentTimeMillis() + Integer.MAX_VALUE); when(clusterData.getInstanceOfflineTimeMap()).thenReturn(instanceOfflineTimeMap); diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java index 10cd662cb2..9ccc14fdfa 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java +++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java @@ -10,12 +10,14 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; + +import com.google.common.collect.ImmutableSet; import org.apache.helix.ConfigAccessor; import org.apache.helix.HelixAdmin; import org.apache.helix.HelixDataAccessor; +import org.apache.helix.HelixException; import org.apache.helix.HelixRollbackException; import org.apache.helix.NotificationContext; -import org.apache.helix.PropertyPathBuilder; import org.apache.helix.TestHelper; import org.apache.helix.common.ZkTestBase; import org.apache.helix.constants.InstanceConstants; @@ -30,8 +32,10 @@ import org.apache.helix.model.ClusterConfig; import org.apache.helix.model.ExternalView; import org.apache.helix.model.IdealState; +import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.Message; import org.apache.helix.model.ResourceAssignment; +import org.apache.helix.model.StateModelDefinition; import org.apache.helix.participant.StateMachineEngine; import org.apache.helix.participant.statemachine.StateModel; import org.apache.helix.participant.statemachine.StateModelFactory; @@ -51,15 +55,28 @@ public class TestInstanceOperation extends ZkTestBase { protected final String CLASS_NAME = getShortClassName(); protected final String CLUSTER_NAME = CLUSTER_PREFIX + "_" + CLASS_NAME; + private final String TEST_CAPACITY_KEY = "TestCapacityKey"; + private final int TEST_CAPACITY_VALUE = 100; + protected static final String ZONE = "zone"; + protected static final String HOST = "host"; + protected static final String LOGICAL_ID = "logicalId"; + protected static final String TOPOLOGY = String.format("%s/%s/%s", ZONE, HOST, LOGICAL_ID); + + protected static final ImmutableSet SECONDARY_STATE_SET = + ImmutableSet.of("SLAVE", "STANDBY"); + protected static final ImmutableSet ACCEPTABLE_STATE_SET = + ImmutableSet.of("MASTER", "LEADER", "SLAVE", "STANDBY"); private int REPLICA = 3; protected ClusterControllerManager _controller; List _participants = new ArrayList<>(); + private List _originalParticipantNames = new ArrayList<>(); List _participantNames = new ArrayList<>(); private Set _allDBs = new HashSet<>(); private ZkHelixClusterVerifier _clusterVerifier; private ConfigAccessor _configAccessor; private long _stateModelDelay = 3L; + private final long DEFAULT_RESOURCE_DELAY_TIME = 1800000L; private HelixAdmin _admin; protected AssignmentMetadataStore _assignmentMetadataStore; HelixDataAccessor _dataAccessor; @@ -72,6 +89,7 @@ public void beforeClass() throws Exception { for (int i = 0; i < NUM_NODE; i++) { String participantName = PARTICIPANT_PREFIX + "_" + (START_PORT + i); + _originalParticipantNames.add(participantName); addParticipant(participantName); } @@ -88,24 +106,88 @@ public void beforeClass() throws Exception { _configAccessor = new ConfigAccessor(_gZkClient); _dataAccessor = new ZKHelixDataAccessor(CLUSTER_NAME, _baseAccessor); + setupClusterConfig(); + + createTestDBs(DEFAULT_RESOURCE_DELAY_TIME); + + setUpWagedBaseline(); + + _admin = new ZKHelixAdmin(_gZkClient); + } + + private void setupClusterConfig() { + _stateModelDelay = 3L; ClusterConfig clusterConfig = _configAccessor.getClusterConfig(CLUSTER_NAME); clusterConfig.stateTransitionCancelEnabled(true); clusterConfig.setDelayRebalaceEnabled(true); clusterConfig.setRebalanceDelayTime(1800000L); _configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig); - createTestDBs(1800000L); + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + } - setUpWagedBaseline(); + private void enabledTopologyAwareRebalance() { + ClusterConfig clusterConfig = _configAccessor.getClusterConfig(CLUSTER_NAME); + clusterConfig.setTopology(TOPOLOGY); + clusterConfig.setFaultZoneType(ZONE); + clusterConfig.setTopologyAwareEnabled(true); + _configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig); - _admin = new ZKHelixAdmin(_gZkClient); + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + } + + private void disableTopologyAwareRebalance() { + ClusterConfig clusterConfig = _configAccessor.getClusterConfig(CLUSTER_NAME); + clusterConfig.setTopologyAwareEnabled(false); + clusterConfig.setTopology(null); + clusterConfig.setFaultZoneType(null); + _configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + } + + private void resetInstances() { + // Disable and drop any participants that are not in the original participant list. + Set droppedParticipants = new HashSet<>(); + for (int i = 0; i < _participants.size(); i++) { + String participantName = _participantNames.get(i); + if (!_originalParticipantNames.contains(participantName)) { + _participants.get(i).syncStop(); + _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME, participantName, false); + _gSetupTool.getClusterManagementTool() + .dropInstance(CLUSTER_NAME, _gSetupTool.getClusterManagementTool().getInstanceConfig(CLUSTER_NAME, participantName)); + droppedParticipants.add(participantName); + } + } + + // Remove the dropped instance from _participants and _participantNames + _participantNames.removeIf(droppedParticipants::contains); + _participants.removeIf(p -> droppedParticipants.contains(p.getInstanceName())); + + for (int i = 0; i < _participants.size(); i++) { + // If instance is not connected to ZK, replace it + if (!_participants.get(i).isConnected()) { + // Drop bad instance from the cluster. + _gSetupTool.getClusterManagementTool() + .dropInstance(CLUSTER_NAME, _gSetupTool.getClusterManagementTool().getInstanceConfig(CLUSTER_NAME, _participantNames.get(i))); + _participants.set(i, createParticipant(_participantNames.get(i), Integer.toString(i), + "zone_" + i, null, true, -1)); + _participants.get(i).syncStart(); + continue; + } + _gSetupTool.getClusterManagementTool() + .setInstanceOperation(CLUSTER_NAME, _participantNames.get(i), null); + _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME, _participantNames.get(i), true); + } + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); } @Test public void testEvacuate() throws Exception { System.out.println("START TestInstanceOperation.testEvacuate() at " + new Date(System.currentTimeMillis())); // EV should contain all participants, check resources one by one - Map assignment = getEV(); + Map assignment = getEVs(); for (String resource : _allDBs) { Assert.assertTrue(getParticipantsInEv(assignment.get(resource)).containsAll(_participantNames)); } @@ -118,7 +200,7 @@ public void testEvacuate() throws Exception { Assert.assertTrue(_clusterVerifier.verifyByPolling()); // New ev should contain all instances but the evacuated one - assignment = getEV(); + assignment = getEVs(); List currentActiveInstances = _participantNames.stream().filter(n -> !n.equals(instanceToEvacuate)).collect(Collectors.toList()); for (String resource : _allDBs) { @@ -143,7 +225,7 @@ public void testRevertEvacuation() throws Exception { Assert.assertTrue(_clusterVerifier.verifyByPolling()); // EV should contain all participants, check resources one by one - Map assignment = getEV(); + Map assignment = getEVs(); for (String resource : _allDBs) { Assert.assertTrue(getParticipantsInEv(assignment.get(resource)).containsAll(_participantNames)); validateAssignmentInEv(assignment.get(resource)); @@ -159,7 +241,7 @@ public void testAddingNodeWithEvacuationTag() throws Exception { .enableInstance(CLUSTER_NAME, mockNewInstance, false); Assert.assertTrue(_clusterVerifier.verifyByPolling()); //ev should contain all instances but the disabled one - Map assignment = getEV(); + Map assignment = getEVs(); List currentActiveInstances = _participantNames.stream().filter(n -> !n.equals(mockNewInstance)).collect(Collectors.toList()); for (String resource : _allDBs) { @@ -175,7 +257,7 @@ public void testAddingNodeWithEvacuationTag() throws Exception { _gSetupTool.getClusterManagementTool() .enableInstance(CLUSTER_NAME, mockNewInstance, true); //ev should be the same - assignment = getEV(); + assignment = getEVs(); currentActiveInstances = _participantNames.stream().filter(n -> !n.equals(mockNewInstance)).collect(Collectors.toList()); for (String resource : _allDBs) { @@ -193,7 +275,7 @@ public void testAddingNodeWithEvacuationTag() throws Exception { Assert.assertTrue(_clusterVerifier.verifyByPolling()); // EV should contain all participants, check resources one by one - assignment = getEV(); + assignment = getEVs(); for (String resource : _allDBs) { Assert.assertTrue(getParticipantsInEv(assignment.get(resource)).containsAll(_participantNames)); validateAssignmentInEv(assignment.get(resource)); @@ -234,7 +316,7 @@ public void testEvacuateAndCancelBeforeBootstrapFinish() throws Exception { // sleep a bit so ST messages can start executing Thread.sleep(Math.abs(_stateModelDelay / 100)); // before we cancel, check current EV - Map assignment = getEV(); + Map assignment = getEVs(); for (String resource : _allDBs) { // check every replica has >= 3 partitions and a top state partition validateAssignmentInEv(assignment.get(resource)); @@ -244,7 +326,7 @@ public void testEvacuateAndCancelBeforeBootstrapFinish() throws Exception { _gSetupTool.getClusterManagementTool() .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, null); - assignment = getEV(); + assignment = getEVs(); for (String resource : _allDBs) { // check every replica has >= 3 active replicas, even before cluster converge validateAssignmentInEv(assignment.get(resource)); @@ -254,7 +336,7 @@ public void testEvacuateAndCancelBeforeBootstrapFinish() throws Exception { Assert.assertTrue(_clusterVerifier.verifyByPolling()); // EV should contain all participants, check resources one by one - assignment = getEV(); + assignment = getEVs(); for (String resource : _allDBs) { Assert.assertTrue(getParticipantsInEv(assignment.get(resource)).containsAll(_participantNames)); // check every replica has >= 3 active replicas again @@ -283,7 +365,7 @@ public void testEvacuateAndCancelBeforeDropFinish() throws Exception { _gSetupTool.getClusterManagementTool() .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, null); // check every replica has >= 3 active replicas, even before cluster converge - Map assignment = getEV(); + Map assignment = getEVs(); for (String resource : _allDBs) { validateAssignmentInEv(assignment.get(resource)); } @@ -291,7 +373,7 @@ public void testEvacuateAndCancelBeforeDropFinish() throws Exception { Assert.assertTrue(_clusterVerifier.verifyByPolling()); // EV should contain all participants, check resources one by one - assignment = getEV(); + assignment = getEVs(); for (String resource : _allDBs) { Assert.assertTrue(getParticipantsInEv(assignment.get(resource)).containsAll(_participantNames)); // check every replica has >= 3 active replicas @@ -309,7 +391,7 @@ public void testMarkEvacuationAfterEMM() throws Exception { addParticipant(PARTICIPANT_PREFIX + "_" + (START_PORT + NUM_NODE)); Assert.assertTrue(_clusterVerifier.verifyByPolling()); - Map assignment = getEV(); + Map assignment = getEVs(); for (String resource : _allDBs) { Assert.assertFalse(getParticipantsInEv(assignment.get(resource)).contains(_participantNames.get(NUM_NODE))); } @@ -332,7 +414,7 @@ public void testMarkEvacuationAfterEMM() throws Exception { Assert.assertTrue(_clusterVerifier.verifyByPolling()); - assignment = getEV(); + assignment = getEVs(); List currentActiveInstances = _participantNames.stream().filter(n -> !n.equals(instanceToEvacuate)).collect(Collectors.toList()); for (String resource : _allDBs) { @@ -342,6 +424,8 @@ public void testMarkEvacuationAfterEMM() throws Exception { Assert.assertTrue(newPAssignedParticipants.containsAll(currentActiveInstances)); } Assert.assertTrue(_admin.isReadyForPreparingJoiningCluster(CLUSTER_NAME, instanceToEvacuate)); + + _stateModelDelay = 3L; } @Test(dependsOnMethods = "testMarkEvacuationAfterEMM") @@ -356,7 +440,7 @@ public void testEvacuationWithOfflineInstancesInCluster() throws Exception { Map assignment; // EV should contain all participants, check resources one by one - assignment = getEV(); + assignment = getEVs(); for (String resource : _allDBs) { TestHelper.verify(() -> { ExternalView ev = assignment.get(resource); @@ -379,13 +463,686 @@ public void testEvacuationWithOfflineInstancesInCluster() throws Exception { }, 30000); } - _participants.get(1).syncStart(); - _participants.get(2).syncStart(); + resetInstances(); + dropTestDBs(ImmutableSet.of("TEST_DB3_DELAYED_CRUSHED", "TEST_DB4_DELAYED_WAGED")); } + @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testEvacuationWithOfflineInstancesInCluster") + public void testAddingNodeWithSwapOutInstanceOperation() throws Exception { + System.out.println( + "START TestInstanceOperation.testAddingNodeWithSwapOutInstanceOperation() at " + new Date( + System.currentTimeMillis())); + + enabledTopologyAwareRebalance(); + resetInstances(); + + // Set instance's InstanceOperation to SWAP_OUT + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); + + // Add instance with InstanceOperation set to SWAP_IN + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), + InstanceConstants.InstanceOperation.SWAP_OUT, true, -1); + } - private void addParticipant(String participantName) { - _gSetupTool.addInstanceToCluster(CLUSTER_NAME, participantName); + @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testAddingNodeWithSwapOutInstanceOperation") + public void testAddingNodeWithSwapOutNodeInstanceOperationUnset() throws Exception { + System.out.println( + "START TestInstanceOperation.testAddingNodeWithSwapOutNodeInstanceOperationUnset() at " + + new Date(System.currentTimeMillis())); + + resetInstances(); + + // Set instance's InstanceOperation to null + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + _gSetupTool.getClusterManagementTool() + .setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, null); + + // Add instance with InstanceOperation set to SWAP_IN + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), + InstanceConstants.InstanceOperation.SWAP_IN, true, -1); + } + + @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testAddingNodeWithSwapOutNodeInstanceOperationUnset") + public void testNodeSwapWithNoSwapOutNode() throws Exception { + System.out.println("START TestInstanceOperation.testNodeSwapWithNoSwapOutNode() at " + new Date( + System.currentTimeMillis())); + + resetInstances(); + + // Add new instance with InstanceOperation set to SWAP_IN + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + addParticipant(instanceToSwapInName, "1000", "zone_1000", + InstanceConstants.InstanceOperation.SWAP_IN, true, -1); + } + + @Test(dependsOnMethods = "testNodeSwapWithNoSwapOutNode") + public void testNodeSwapSwapInNodeNoInstanceOperationEnabled() throws Exception { + System.out.println( + "START TestInstanceOperation.testNodeSwapSwapInNodeNoInstanceOperationEnabled() at " + + new Date(System.currentTimeMillis())); + + resetInstances(); + + // Set instance's InstanceOperation to SWAP_OUT + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); + + // Add instance with same logicalId with InstanceOperation unset + // This should work because adding instance with InstanceOperation unset will automatically + // set the InstanceOperation to SWAP_IN. + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, true, -1); + } + + @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testNodeSwapSwapInNodeNoInstanceOperationEnabled") + public void testNodeSwapSwapInNodeWithAlreadySwappingPair() throws Exception { + System.out.println( + "START TestInstanceOperation.testNodeSwapSwapInNodeWithAlreadySwappingPair() at " + + new Date(System.currentTimeMillis())); + + resetInstances(); + + // Set instance's InstanceOperation to SWAP_OUT + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); + + // Add instance with InstanceOperation set to SWAP_IN + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), + InstanceConstants.InstanceOperation.SWAP_IN, true, -1); + + // Add another instance with InstanceOperation set to SWAP_IN with same logicalId as previously + // added SWAP_IN instance. + String secondInstanceToSwapInName = + PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + addParticipant(secondInstanceToSwapInName, + instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), + InstanceConstants.InstanceOperation.SWAP_IN, true, -1); + } + + @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testNodeSwapSwapInNodeWithAlreadySwappingPair") + public void testNodeSwapNoTopologySetup() throws Exception { + System.out.println("START TestInstanceOperation.testNodeSwapNoTopologySetup() at " + new Date( + System.currentTimeMillis())); + disableTopologyAwareRebalance(); + resetInstances(); + + // Set instance's InstanceOperation to SWAP_OUT + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); + + // Add instance with InstanceOperation set to SWAP_IN + // There should be an error that the logicalId does not have SWAP_OUT instance because, + // helix can't determine what topology key to use to get the logicalId if TOPOLOGY is not set. + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), + InstanceConstants.InstanceOperation.SWAP_IN, true, -1); + } + + @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testNodeSwapNoTopologySetup") + public void testNodeSwapWrongFaultZone() throws Exception { + System.out.println("START TestInstanceOperation.testNodeSwapWrongFaultZone() at " + new Date( + System.currentTimeMillis())); + // Re-enable topology aware rebalancing and set TOPOLOGY. + enabledTopologyAwareRebalance(); + resetInstances(); + + // Set instance's InstanceOperation to SWAP_OUT + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); + + // Add instance with InstanceOperation set to SWAP_IN + // There should be an error because SWAP_IN instance must be in the same FAULT_ZONE as the SWAP_OUT instance. + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE) + "1", + InstanceConstants.InstanceOperation.SWAP_IN, true, -1); + } + + @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testNodeSwapWrongFaultZone") + public void testNodeSwapWrongCapacity() throws Exception { + System.out.println("START TestInstanceOperation.testNodeSwapWrongCapacity() at " + new Date( + System.currentTimeMillis())); + resetInstances(); + + // Set instance's InstanceOperation to SWAP_OUT + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); + + // Add instance with InstanceOperation set to SWAP_IN + // There should be an error because SWAP_IN instance must have same capacity as the SWAP_OUT node. + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), + InstanceConstants.InstanceOperation.SWAP_IN, true, TEST_CAPACITY_VALUE - 10); + } + + @Test(dependsOnMethods = "testNodeSwapWrongCapacity") + public void testNodeSwap() throws Exception { + System.out.println( + "START TestInstanceOperation.testNodeSwap() at " + new Date(System.currentTimeMillis())); + resetInstances(); + + // Store original EV + Map originalEVs = getEVs(); + + Map swapOutInstancesToSwapInInstances = new HashMap<>(); + + // Set instance's InstanceOperation to SWAP_OUT + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); + + // Validate that the assignment has not changed since setting the InstanceOperation to SWAP_OUT + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Collections.emptySet()); + + // Add instance with InstanceOperation set to SWAP_IN + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + swapOutInstancesToSwapInInstances.put(instanceToSwapOutName, instanceToSwapInName); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), + InstanceConstants.InstanceOperation.SWAP_IN, true, -1); + + // Validate that partitions on SWAP_OUT instance does not change after setting the InstanceOperation to SWAP_OUT + // and adding the SWAP_IN instance to the cluster. + // Check that the SWAP_IN instance has the same partitions as the SWAP_OUT instance + // but none of them are in a top state. + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Set.of(instanceToSwapInName), Collections.emptySet()); + + // Assert canSwapBeCompleted is true + Assert.assertTrue(_gSetupTool.getClusterManagementTool() + .canCompleteSwap(CLUSTER_NAME, instanceToSwapOutName)); + // Assert completeSwapIfPossible is true + Assert.assertTrue(_gSetupTool.getClusterManagementTool() + .completeSwapIfPossible(CLUSTER_NAME, instanceToSwapOutName)); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. + Assert.assertFalse(_gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); + Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapOutName).size(), + 0); + + // Validate that the SWAP_IN instance has the same partitions the SWAP_OUT instance had before + // swap was completed. + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Set.of(instanceToSwapInName)); + } + + @Test(dependsOnMethods = "testNodeSwap") + public void testNodeSwapSwapInNodeNoInstanceOperationDisabled() throws Exception { + System.out.println( + "START TestInstanceOperation.testNodeSwapSwapInNodeNoInstanceOperationDisabled() at " + + new Date(System.currentTimeMillis())); + + resetInstances(); + + // Store original EVs + Map originalEVs = getEVs(); + + Map swapOutInstancesToSwapInInstances = new HashMap<>(); + + // Set instance's InstanceOperation to SWAP_OUT + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); + + // Validate that the assignment has not changed since setting the InstanceOperation to SWAP_OUT + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Collections.emptySet()); + + // Add instance with InstanceOperation unset, should automatically be set to SWAP_IN + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + swapOutInstancesToSwapInInstances.put(instanceToSwapOutName, instanceToSwapInName); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, false, -1); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Collections.emptySet()); + + // Enable the SWAP_IN instance, so it can start being assigned replicas + _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME, instanceToSwapInName, true); + + // Validate that partitions on SWAP_OUT instance does not change after setting the InstanceOperation to SWAP_OUT + // and adding the SWAP_IN instance to the cluster. + // Check that the SWAP_IN instance has the same partitions as the SWAP_OUT instance + // but none of them are in a top state. + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Set.of(instanceToSwapInName), Collections.emptySet()); + + // Assert canSwapBeCompleted is true + Assert.assertTrue(_gSetupTool.getClusterManagementTool() + .canCompleteSwap(CLUSTER_NAME, instanceToSwapOutName)); + // Assert completeSwapIfPossible is true + Assert.assertTrue(_gSetupTool.getClusterManagementTool() + .completeSwapIfPossible(CLUSTER_NAME, instanceToSwapOutName)); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. + Assert.assertFalse(_gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); + Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapOutName).size(), + 0); + + // Validate that the SWAP_IN instance has the same partitions the SWAP_OUT instance had before + // swap was completed. + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Set.of(instanceToSwapInName)); + } + + @Test(dependsOnMethods = "testNodeSwapSwapInNodeNoInstanceOperationDisabled") + public void testNodeSwapCancelSwapWhenReadyToComplete() throws Exception { + System.out.println( + "START TestInstanceOperation.testNodeSwapCancelSwapWhenReadyToComplete() at " + new Date( + System.currentTimeMillis())); + + resetInstances(); + + // Store original EVs + Map originalEVs = getEVs(); + + Map swapOutInstancesToSwapInInstances = new HashMap<>(); + + // Set instance's InstanceOperation to SWAP_OUT + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); + + // Validate that the assignment has not changed since setting the InstanceOperation to SWAP_OUT + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Collections.emptySet()); + + // Add instance with InstanceOperation set to SWAP_IN + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + swapOutInstancesToSwapInInstances.put(instanceToSwapOutName, instanceToSwapInName); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), + InstanceConstants.InstanceOperation.SWAP_IN, true, -1); + + // Validate that partitions on SWAP_OUT instance does not change after setting the InstanceOperation to SWAP_OUT + // and adding the SWAP_IN instance to the cluster. + // Check that the SWAP_IN instance has the same partitions as the SWAP_OUT instance + // but none of them are in a top state. + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Set.of(instanceToSwapInName), Collections.emptySet()); + + // Assert canSwapBeCompleted is true + Assert.assertTrue(_gSetupTool.getClusterManagementTool() + .canCompleteSwap(CLUSTER_NAME, instanceToSwapOutName)); + + // Cancel SWAP by disabling the SWAP_IN instance and remove SWAP_OUT InstanceOperation from SWAP_OUT instance. + _gSetupTool.getClusterManagementTool() + .enableInstance(CLUSTER_NAME, instanceToSwapInName, false); + + // Wait for cluster to converge. + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Validate there are no partitions on the SWAP_IN instance. + Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapInName).size(), 0); + + // Validate that the SWAP_OUT instance has the same partitions as it had before. + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Collections.emptySet()); + + _gSetupTool.getClusterManagementTool() + .setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, null); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Validate there are no partitions on the SWAP_IN instance. + Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapInName).size(), 0); + + // Validate that the SWAP_OUT instance has the same partitions as it had before. + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Collections.emptySet()); + } + + @Test(dependsOnMethods = "testNodeSwapCancelSwapWhenReadyToComplete") + public void testNodeSwapAfterEMM() throws Exception { + System.out.println("START TestInstanceOperation.testNodeSwapAfterEMM() at " + new Date( + System.currentTimeMillis())); + + resetInstances(); + + // Store original EVs + Map originalEVs = getEVs(); + + Map swapOutInstancesToSwapInInstances = new HashMap<>(); + + // Put the cluster in maintenance mode. + _gSetupTool.getClusterManagementTool() + .manuallyEnableMaintenanceMode(CLUSTER_NAME, true, null, null); + + // Set instance's InstanceOperation to SWAP_OUT + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); + + // Validate that the assignment has not changed since setting the InstanceOperation to SWAP_OUT + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Collections.emptySet()); + + // Add instance with InstanceOperation set to SWAP_IN + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + swapOutInstancesToSwapInInstances.put(instanceToSwapOutName, instanceToSwapInName); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), + InstanceConstants.InstanceOperation.SWAP_IN, true, -1); + + // Validate that the assignment has not changed since adding the SWAP_IN node. + // During MM, the cluster should not compute new assignment. + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Collections.emptySet()); + + // Remove the cluster from maintenance mode. + // Now swapping will begin + _gSetupTool.getClusterManagementTool() + .manuallyEnableMaintenanceMode(CLUSTER_NAME, false, null, null); + + // Validate that partitions on SWAP_OUT instance does not change after exiting MM + // Check that the SWAP_IN instance has the same partitions as the SWAP_OUT instance + // but none of them are in a top state. + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Set.of(instanceToSwapInName), Collections.emptySet()); + + // Assert canSwapBeCompleted is true + Assert.assertTrue(_gSetupTool.getClusterManagementTool() + .canCompleteSwap(CLUSTER_NAME, instanceToSwapOutName)); + // Assert completeSwapIfPossible is true + Assert.assertTrue(_gSetupTool.getClusterManagementTool() + .completeSwapIfPossible(CLUSTER_NAME, instanceToSwapOutName)); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. + Assert.assertFalse(_gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); + Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapOutName).size(), + 0); + + // Validate that the SWAP_IN instance has the same partitions the SWAP_OUT instance had before + // swap was completed. + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Set.of(instanceToSwapInName)); + } + + @Test(dependsOnMethods = "testNodeSwapAfterEMM") + public void testNodeSwapWithSwapOutInstanceDisabled() throws Exception { + System.out.println( + "START TestInstanceOperation.testNodeSwapWithSwapOutInstanceDisabled() at " + new Date( + System.currentTimeMillis())); + + resetInstances(); + + // Store original EVs + Map originalEVs = getEVs(); + + // Set instance's InstanceOperation to SWAP_OUT + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); + + Set swapOutInstanceOriginalPartitions = + getPartitionsAndStatesOnInstance(originalEVs, instanceToSwapOutName).keySet(); + + // Disable the SWAP_OUT instance. + _gSetupTool.getClusterManagementTool() + .enableInstance(CLUSTER_NAME, instanceToSwapOutName, false); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Validate that the SWAP_OUT instance has all partitions in OFFLINE state + Set swapOutInstanceOfflineStates = + new HashSet<>(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapOutName).values()); + Assert.assertEquals(swapOutInstanceOfflineStates.size(), 1); + Assert.assertTrue(swapOutInstanceOfflineStates.contains("OFFLINE")); + + // Add instance with InstanceOperation set to SWAP_IN + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), + InstanceConstants.InstanceOperation.SWAP_IN, true, -1); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Validate that the SWAP_IN instance has the same partitions as the SWAP_OUT instance in second top state. + Map swapInInstancePartitionsAndStates = + getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapInName); + Assert.assertTrue( + swapInInstancePartitionsAndStates.keySet().containsAll(swapOutInstanceOriginalPartitions)); + Set swapInInstanceStates = new HashSet<>(swapInInstancePartitionsAndStates.values()); + swapInInstanceStates.removeAll(SECONDARY_STATE_SET); + Assert.assertEquals(swapInInstanceStates.size(), 0); + + // Assert canSwapBeCompleted is false because SWAP_OUT instance is disabled. + Assert.assertFalse(_gSetupTool.getClusterManagementTool() + .canCompleteSwap(CLUSTER_NAME, instanceToSwapOutName)); + + // Enable the SWAP_OUT instance. + _gSetupTool.getClusterManagementTool() + .enableInstance(CLUSTER_NAME, instanceToSwapOutName, true); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Assert completeSwapIfPossible is true + Assert.assertTrue(_gSetupTool.getClusterManagementTool() + .completeSwapIfPossible(CLUSTER_NAME, instanceToSwapOutName)); + + // Validate that the SWAP_IN instance has the same partitions the SWAP_OUT instance originally + // had. Validate they are in second top state because initially disabling SWAP_OUT instance + // caused all topStates to be handed off to next replica in the preference list. + swapInInstancePartitionsAndStates = + getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapInName); + Assert.assertTrue( + swapInInstancePartitionsAndStates.keySet().containsAll(swapOutInstanceOriginalPartitions)); + swapInInstanceStates = new HashSet<>(swapInInstancePartitionsAndStates.values()); + swapInInstanceStates.removeAll(SECONDARY_STATE_SET); + Assert.assertEquals(swapInInstanceStates.size(), 0); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. + Assert.assertFalse(_gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); + Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapOutName).size(), + 0); + } + + @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testNodeSwapWithSwapOutInstanceDisabled") + public void testNodeSwapAddSwapInFirstEnabledBeforeSwapOutSet() { + System.out.println( + "START TestInstanceOperation.testNodeSwapAddSwapInFirstEnabledBeforeSwapOutSet() at " + + new Date(System.currentTimeMillis())); + resetInstances(); + + // Get the SWAP_OUT instance. + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + + // Add instance with InstanceOperation set to SWAP_IN enabled before setting SWAP_OUT instance. + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, true, -1); + } + + @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testNodeSwapAddSwapInFirstEnabledBeforeSwapOutSet") + public void testNodeSwapAddSwapInFirstEnableBeforeSwapOutSet() { + System.out.println( + "START TestInstanceOperation.testNodeSwapAddSwapInFirstEnableBeforeSwapOutSet() at " + + new Date(System.currentTimeMillis())); + resetInstances(); + + // Get the SWAP_OUT instance. + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + + // Add instance with InstanceOperation set to SWAP_IN + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, false, -1); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Enable the SWAP_IN instance before we have set the SWAP_OUT instance. + _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME, instanceToSwapInName, true); + } + + @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testNodeSwapAddSwapInFirstEnableBeforeSwapOutSet") + public void testUnsetInstanceOperationOnSwapInWhenAlreadyUnsetOnSwapOut() { + System.out.println( + "START TestInstanceOperation.testUnsetInstanceOperationOnSwapInWhenAlreadyUnsetOnSwapOut() at " + + new Date(System.currentTimeMillis())); + resetInstances(); + + // Get the SWAP_OUT instance. + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + + // Add instance with InstanceOperation set to SWAP_IN + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, false, -1); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Try to remove the InstanceOperation from the SWAP_IN instance before the SWAP_OUT instance is set. + // This should throw exception because we cannot ever have two instances with the same logicalId and both have InstanceOperation + // unset. + _gSetupTool.getClusterManagementTool() + .setInstanceOperation(CLUSTER_NAME, instanceToSwapInName, null); + } + + @Test(dependsOnMethods = "testUnsetInstanceOperationOnSwapInWhenAlreadyUnsetOnSwapOut") + public void testNodeSwapAddSwapInFirst() { + System.out.println("START TestInstanceOperation.testNodeSwapAddSwapInFirst() at " + new Date( + System.currentTimeMillis())); + resetInstances(); + + // Store original EV + Map originalEVs = getEVs(); + + Map swapOutInstancesToSwapInInstances = new HashMap<>(); + + // Get the SWAP_OUT instance. + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + + // Add instance with InstanceOperation set to SWAP_IN + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + swapOutInstancesToSwapInInstances.put(instanceToSwapOutName, instanceToSwapInName); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, false, -1); + + // Validate that the assignment has not changed since setting the InstanceOperation to SWAP_OUT + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Collections.emptySet()); + + // After the SWAP_IN instance is added, we set the InstanceOperation to SWAP_OUT + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Enable the SWAP_IN instance to begin the swap operation. + _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME, instanceToSwapInName, true); + + // Validate that partitions on SWAP_OUT instance does not change after setting the InstanceOperation to SWAP_OUT + // and adding the SWAP_IN instance to the cluster. + // Check that the SWAP_IN instance has the same partitions as the SWAP_OUT instance + // but none of them are in a top state. + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Set.of(instanceToSwapInName), Collections.emptySet()); + + // Assert canSwapBeCompleted is true + Assert.assertTrue(_gSetupTool.getClusterManagementTool() + .canCompleteSwap(CLUSTER_NAME, instanceToSwapOutName)); + // Assert completeSwapIfPossible is true + Assert.assertTrue(_gSetupTool.getClusterManagementTool() + .completeSwapIfPossible(CLUSTER_NAME, instanceToSwapOutName)); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. + Assert.assertFalse(_gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); + Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapOutName).size(), + 0); + + // Validate that the SWAP_IN instance has the same partitions the SWAP_OUT instance had before + // swap was completed. + validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Set.of(instanceToSwapInName)); + } + + private MockParticipantManager createParticipant(String participantName, String logicalId, String zone, + InstanceConstants.InstanceOperation instanceOperation, boolean enabled, int capacity) { + InstanceConfig config = new InstanceConfig.Builder().setDomain( + String.format("%s=%s, %s=%s, %s=%s", ZONE, zone, HOST, participantName, LOGICAL_ID, + logicalId)).setInstanceEnabled(enabled).setInstanceOperation(instanceOperation) + .build(participantName); + if (capacity >= 0) { + config.setInstanceCapacityMap(Map.of(TEST_CAPACITY_KEY, capacity)); + } + _gSetupTool.getClusterManagementTool().addInstance(CLUSTER_NAME, config); // start dummy participants MockParticipantManager participant = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, participantName); @@ -393,12 +1150,24 @@ private void addParticipant(String participantName) { // Using a delayed state model StDelayMSStateModelFactory delayFactory = new StDelayMSStateModelFactory(); stateMachine.registerStateModelFactory("MasterSlave", delayFactory); + return participant; + } + + private void addParticipant(String participantName, String logicalId, String zone, + InstanceConstants.InstanceOperation instanceOperation, boolean enabled, int capacity) { + MockParticipantManager participant = createParticipant(participantName, logicalId, zone, + instanceOperation, enabled, capacity); participant.syncStart(); _participants.add(participant); _participantNames.add(participantName); } + private void addParticipant(String participantName) { + addParticipant(participantName, Integer.toString(_participants.size()), + "zone_" + _participants.size(), null, true, -1); + } + private void createTestDBs(long delayTime) throws InterruptedException { createResourceWithDelayedRebalance(CLUSTER_NAME, "TEST_DB0_CRUSHED", BuiltInStateModelDefinitions.LeaderStandby.name(), PARTITIONS, REPLICA, REPLICA - 1, -1, @@ -415,7 +1184,15 @@ private void createTestDBs(long delayTime) throws InterruptedException { Assert.assertTrue(_clusterVerifier.verifyByPolling()); } - private Map getEV() { + private void dropTestDBs(Set dbs) { + for (String db : dbs) { + _gSetupTool.getClusterManagementTool().dropResource(CLUSTER_NAME, db); + _allDBs.remove(db); + } + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + } + + private Map getEVs() { Map externalViews = new HashMap(); for (String db : _allDBs) { ExternalView ev = _gSetupTool.getClusterManagementTool().getResourceExternalView(CLUSTER_NAME, db); @@ -450,7 +1227,65 @@ private Set getParticipantsInEv(ExternalView ev) { return assignedParticipants; } - // verify that each partition has >=REPLICA (3 in this case) replicas + private Map getPartitionsAndStatesOnInstance(Map evs, + String instanceName) { + Map instancePartitions = new HashMap<>(); + for (String resourceEV : evs.keySet()) { + for (String partition : evs.get(resourceEV).getPartitionSet()) { + if (evs.get(resourceEV).getStateMap(partition).containsKey(instanceName)) { + instancePartitions.put(partition, + evs.get(resourceEV).getStateMap(partition).get(instanceName)); + } + } + } + + return instancePartitions; + } + + private void validateEVCorrect(ExternalView actual, ExternalView original, + Map swapOutInstancesToSwapInInstances, Set inFlightSwapInInstances, + Set completedSwapInInstanceNames) { + Assert.assertEquals(actual.getPartitionSet(), original.getPartitionSet()); + IdealState is = _gSetupTool.getClusterManagementTool() + .getResourceIdealState(CLUSTER_NAME, original.getResourceName()); + StateModelDefinition stateModelDef = _gSetupTool.getClusterManagementTool() + .getStateModelDef(CLUSTER_NAME, is.getStateModelDefRef()); + for (String partition : actual.getPartitionSet()) { + Map expectedStateMap = new HashMap<>(original.getStateMap(partition)); + for (String swapOutInstance : swapOutInstancesToSwapInInstances.keySet()) { + if (expectedStateMap.containsKey(swapOutInstance) && inFlightSwapInInstances.contains( + swapOutInstancesToSwapInInstances.get(swapOutInstance))) { + // If the corresponding swapInInstance is in-flight, add it to the expectedStateMap + // with the same state as the swapOutInstance or secondState if the swapOutInstance + // has a topState. + expectedStateMap.put(swapOutInstancesToSwapInInstances.get(swapOutInstance), + expectedStateMap.get(swapOutInstance).equals(stateModelDef.getTopState()) + ? (String) stateModelDef.getSecondTopStates().toArray()[0] + : expectedStateMap.get(swapOutInstance)); + } else if (expectedStateMap.containsKey(swapOutInstance) + && completedSwapInInstanceNames.contains( + swapOutInstancesToSwapInInstances.get(swapOutInstance))) { + // If the corresponding swapInInstance is completed, add it to the expectedStateMap + // with the same state as the swapOutInstance. + expectedStateMap.put(swapOutInstancesToSwapInInstances.get(swapOutInstance), + expectedStateMap.get(swapOutInstance)); + expectedStateMap.remove(swapOutInstance); + } + } + Assert.assertEquals(actual.getStateMap(partition), expectedStateMap, "Error for partition " + partition + + " in resource " + actual.getResourceName()); + } + } + + private void validateEVsCorrect(Map actuals, + Map originals, Map swapOutInstancesToSwapInInstances, + Set inFlightSwapInInstances, Set completedSwapInInstanceNames) { + Assert.assertEquals(actuals.keySet(), originals.keySet()); + for (String resource : actuals.keySet()) { + validateEVCorrect(actuals.get(resource), originals.get(resource), + swapOutInstancesToSwapInInstances, inFlightSwapInInstances, completedSwapInInstanceNames); + } + } private void validateAssignmentInEv(ExternalView ev) { validateAssignmentInEv(ev, REPLICA); @@ -460,10 +1295,7 @@ private void validateAssignmentInEv(ExternalView ev, int expectedNumber) { Set partitionSet = ev.getPartitionSet(); for (String partition : partitionSet) { AtomicInteger activeReplicaCount = new AtomicInteger(); - ev.getStateMap(partition) - .values() - .stream() - .filter(v -> v.equals("MASTER") || v.equals("LEADER") || v.equals("SLAVE") || v.equals("FOLLOWER") || v.equals("STANDBY")) + ev.getStateMap(partition).values().stream().filter(ACCEPTABLE_STATE_SET::contains) .forEach(v -> activeReplicaCount.getAndIncrement()); Assert.assertTrue(activeReplicaCount.get() >=expectedNumber); } @@ -486,10 +1318,10 @@ public synchronized Map getBestPossibleAssignment() // Set test instance capacity and partition weights ClusterConfig clusterConfig = _dataAccessor.getProperty(_dataAccessor.keyBuilder().clusterConfig()); - String testCapacityKey = "TestCapacityKey"; - clusterConfig.setInstanceCapacityKeys(Collections.singletonList(testCapacityKey)); - clusterConfig.setDefaultInstanceCapacityMap(Collections.singletonMap(testCapacityKey, 100)); - clusterConfig.setDefaultPartitionWeightMap(Collections.singletonMap(testCapacityKey, 1)); + clusterConfig.setInstanceCapacityKeys(Collections.singletonList(TEST_CAPACITY_KEY)); + clusterConfig.setDefaultInstanceCapacityMap( + Collections.singletonMap(TEST_CAPACITY_KEY, TEST_CAPACITY_VALUE)); + clusterConfig.setDefaultPartitionWeightMap(Collections.singletonMap(TEST_CAPACITY_KEY, 1)); _dataAccessor.setProperty(_dataAccessor.keyBuilder().clusterConfig(), clusterConfig); } diff --git a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java index e1ffbb646c..59decd98e5 100644 --- a/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java +++ b/helix-core/src/test/java/org/apache/helix/manager/zk/TestZkHelixAdmin.java @@ -898,24 +898,28 @@ public void testGetDomainInformation() { InstanceConfig instanceConfig = new InstanceConfig(instanceName); instanceConfig.setHostName(hostname); instanceConfig.setPort(port); - if (i == 40) { - instanceConfig.setDomain(String - .format("invaliddomain=%s,zone=%s,rack=%s,host=%s", "mygroup" + i % 2, "myzone" + i % 4, - "myrack" + i % 4, hostname)); - } else if (i == 41) { - instanceConfig.setDomain("invaliddomain"); - } else { - String domain = String - .format("group=%s,zone=%s,rack=%s,host=%s", "mygroup" + i % 2, "myzone" + i % 4, - "myrack" + i % 4, hostname); - instanceConfig.setDomain(domain); - } + + String domain = + String.format("group=%s,zone=%s,rack=%s,host=%s", "mygroup" + i % 2, "myzone" + i % 4, + "myrack" + i % 4, hostname); + instanceConfig.setDomain(domain); + LiveInstance liveInstance = new LiveInstance(instanceName); liveInstance.setSessionId(UUID.randomUUID().toString()); liveInstance.setHelixVersion(UUID.randomUUID().toString()); accessor.setProperty(keyBuilder.liveInstance(instanceName), liveInstance); admin.addInstance(clusterName, instanceConfig); admin.enableInstance(clusterName, instanceName, true); + + if (i == 40) { + instanceConfig.setDomain( + String.format("invaliddomain=%s,zone=%s,rack=%s,host=%s", "mygroup" + i % 2, + "myzone" + i % 4, "myrack" + i % 4, hostname)); + admin.setInstanceConfig(clusterName, instanceName, instanceConfig); + } else if (i == 41) { + instanceConfig.setDomain("invaliddomain"); + admin.setInstanceConfig(clusterName, instanceName, instanceConfig); + } } ClusterTopology clusterTopology = admin.getClusterTopology(clusterName); diff --git a/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java b/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java index 512a7b4db7..aa93bc9c88 100644 --- a/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java +++ b/helix-core/src/test/java/org/apache/helix/mock/MockHelixAdmin.java @@ -556,6 +556,16 @@ public boolean isEvacuateFinished(String clusterName, String instancesNames) { return false; } + @Override + public boolean canCompleteSwap(String clusterName, String instancesNames) { + return false; + } + + @Override + public boolean completeSwapIfPossible(String clusterName, String instanceName) { + return false; + } + @Override public boolean isReadyForPreparingJoiningCluster(String clusterName, String instancesNames) { return false; diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/AbstractResource.java b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/AbstractResource.java index 48b467eaac..64fbaff412 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/AbstractResource.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/AbstractResource.java @@ -86,6 +86,8 @@ public enum Command { getInstance, getAllInstances, setInstanceOperation, // TODO: Name is just a place holder, may change in future + canCompleteSwap, + completeSwapIfPossible, onDemandRebalance } diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/PerInstanceAccessor.java b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/PerInstanceAccessor.java index efc3ce6521..b920f66ce8 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/PerInstanceAccessor.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/PerInstanceAccessor.java @@ -427,52 +427,63 @@ public Response updateInstance(@PathParam("clusterId") String clusterId, } admin.resetPartition(clusterId, instanceName, node.get(PerInstanceProperties.resource.name()).textValue(), - (List) OBJECT_MAPPER - .readValue(node.get(PerInstanceProperties.partitions.name()).toString(), - OBJECT_MAPPER.getTypeFactory() - .constructCollectionType(List.class, String.class))); - break; - case setInstanceOperation: - admin.setInstanceOperation(clusterId, instanceName, state); - break; - case addInstanceTag: - if (!validInstance(node, instanceName)) { - return badRequest("Instance names are not match!"); - } - for (String tag : (List) OBJECT_MAPPER - .readValue(node.get(PerInstanceProperties.instanceTags.name()).toString(), - OBJECT_MAPPER.getTypeFactory().constructCollectionType(List.class, String.class))) { - admin.addInstanceTag(clusterId, instanceName, tag); - } - break; - case removeInstanceTag: - if (!validInstance(node, instanceName)) { - return badRequest("Instance names are not match!"); - } - for (String tag : (List) OBJECT_MAPPER - .readValue(node.get(PerInstanceProperties.instanceTags.name()).toString(), - OBJECT_MAPPER.getTypeFactory().constructCollectionType(List.class, String.class))) { - admin.removeInstanceTag(clusterId, instanceName, tag); - } - break; - case enablePartitions: - admin.enablePartition(true, clusterId, instanceName, - node.get(PerInstanceProperties.resource.name()).textValue(), - (List) OBJECT_MAPPER - .readValue(node.get(PerInstanceProperties.partitions.name()).toString(), - OBJECT_MAPPER.getTypeFactory() - .constructCollectionType(List.class, String.class))); - break; - case disablePartitions: - admin.enablePartition(false, clusterId, instanceName, - node.get(PerInstanceProperties.resource.name()).textValue(), - (List) OBJECT_MAPPER - .readValue(node.get(PerInstanceProperties.partitions.name()).toString(), - OBJECT_MAPPER.getTypeFactory().constructCollectionType(List.class, String.class))); - break; - default: - LOG.error("Unsupported command :" + command); - return badRequest("Unsupported command :" + command); + (List) OBJECT_MAPPER.readValue( + node.get(PerInstanceProperties.partitions.name()).toString(), + OBJECT_MAPPER.getTypeFactory() + .constructCollectionType(List.class, String.class))); + break; + case setInstanceOperation: + admin.setInstanceOperation(clusterId, instanceName, state); + break; + case canCompleteSwap: + if (!admin.canCompleteSwap(clusterId, instanceName)) { + return badRequest("Swap is not ready to be completed!"); + } + break; + case completeSwapIfPossible: + if (!admin.completeSwapIfPossible(clusterId, instanceName)) { + return badRequest("Swap is not ready to be completed!"); + } + break; + case addInstanceTag: + if (!validInstance(node, instanceName)) { + return badRequest("Instance names are not match!"); + } + for (String tag : (List) OBJECT_MAPPER.readValue( + node.get(PerInstanceProperties.instanceTags.name()).toString(), + OBJECT_MAPPER.getTypeFactory().constructCollectionType(List.class, String.class))) { + admin.addInstanceTag(clusterId, instanceName, tag); + } + break; + case removeInstanceTag: + if (!validInstance(node, instanceName)) { + return badRequest("Instance names are not match!"); + } + for (String tag : (List) OBJECT_MAPPER.readValue( + node.get(PerInstanceProperties.instanceTags.name()).toString(), + OBJECT_MAPPER.getTypeFactory().constructCollectionType(List.class, String.class))) { + admin.removeInstanceTag(clusterId, instanceName, tag); + } + break; + case enablePartitions: + admin.enablePartition(true, clusterId, instanceName, + node.get(PerInstanceProperties.resource.name()).textValue(), + (List) OBJECT_MAPPER.readValue( + node.get(PerInstanceProperties.partitions.name()).toString(), + OBJECT_MAPPER.getTypeFactory() + .constructCollectionType(List.class, String.class))); + break; + case disablePartitions: + admin.enablePartition(false, clusterId, instanceName, + node.get(PerInstanceProperties.resource.name()).textValue(), + (List) OBJECT_MAPPER.readValue( + node.get(PerInstanceProperties.partitions.name()).toString(), + OBJECT_MAPPER.getTypeFactory() + .constructCollectionType(List.class, String.class))); + break; + default: + LOG.error("Unsupported command :" + command); + return badRequest("Unsupported command :" + command); } } catch (Exception e) { LOG.error("Failed in updating instance : " + instanceName, e); From 595f1cc53d927fdca143b77eeb97f125c9b45184 Mon Sep 17 00:00:00 2001 From: Xiaxuan Gao <32374858+MarkGaox@users.noreply.github.com> Date: Fri, 3 Nov 2023 17:29:20 -0700 Subject: [PATCH 04/11] Enhanced stoppable checks with node evacuation filtering and introduced blacklisting capabilities (#2687) Enhanced stoppable checks with node evacuation filtering and introduced blacklisting capabilities --- .../helix/util/InstanceValidationUtil.java | 9 +- .../util/TestInstanceValidationUtil.java | 2 +- .../MaintenanceManagementService.java | 115 +++++++++++++++++- .../StoppableInstancesSelector.java | 42 ++++++- .../resources/helix/InstancesAccessor.java | 44 ++++++- .../helix/rest/server/AbstractTestClass.java | 2 - .../rest/server/TestInstancesAccessor.java | 99 ++++++++++++++- 7 files changed, 291 insertions(+), 22 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java b/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java index 5f179e784e..2542ecf7fb 100644 --- a/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java +++ b/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java @@ -295,7 +295,7 @@ public static Map> perPartitionHealthCheck(List entry : stateByInstanceMap.entrySet()) { - if (!entry.getKey().equals(instanceName) && (toBeStoppedInstances == null - || !toBeStoppedInstances.contains(entry.getKey())) && !unhealthyStates.contains( - entry.getValue())) { + String siblingInstanceName = entry.getKey(); + if (!siblingInstanceName.equals(instanceName) && (toBeStoppedInstances == null + || !toBeStoppedInstances.contains(siblingInstanceName)) + && !unhealthyStates.contains(entry.getValue())) { numHealthySiblings++; } } diff --git a/helix-core/src/test/java/org/apache/helix/util/TestInstanceValidationUtil.java b/helix-core/src/test/java/org/apache/helix/util/TestInstanceValidationUtil.java index aa1ba32290..79b0fdce81 100644 --- a/helix-core/src/test/java/org/apache/helix/util/TestInstanceValidationUtil.java +++ b/helix-core/src/test/java/org/apache/helix/util/TestInstanceValidationUtil.java @@ -375,7 +375,7 @@ public void TestSiblingNodesActiveReplicaCheck_success() { String resource = "resource"; Mock mock = new Mock(); doReturn(ImmutableList.of(resource)).when(mock.dataAccessor) - .getChildNames(argThat(new PropertyKeyArgument(PropertyType.EXTERNALVIEW))); + .getChildNames(argThat(new PropertyKeyArgument(PropertyType.IDEALSTATES))); // set ideal state IdealState idealState = mock(IdealState.class); when(idealState.isEnabled()).thenReturn(true); diff --git a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementService.java b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementService.java index c3fa04966f..52377e612f 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementService.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementService.java @@ -93,6 +93,10 @@ public class MaintenanceManagementService { private final HelixDataAccessorWrapper _dataAccessor; private final Set _nonBlockingHealthChecks; private final Set _skipHealthCheckCategories; + // Set the default value of _skipStoppableHealthCheckList to be an empty list to + // maintain the backward compatibility with users who don't use MaintenanceManagementServiceBuilder + // to create the MaintenanceManagementService object. + private List _skipStoppableHealthCheckList = Collections.emptyList(); public MaintenanceManagementService(ZKHelixDataAccessor dataAccessor, ConfigAccessor configAccessor, boolean skipZKRead, String namespace) { @@ -144,6 +148,25 @@ public MaintenanceManagementService(ZKHelixDataAccessor dataAccessor, _namespace = namespace; } + private MaintenanceManagementService(ZKHelixDataAccessor dataAccessor, + ConfigAccessor configAccessor, CustomRestClient customRestClient, boolean skipZKRead, + Set nonBlockingHealthChecks, Set skipHealthCheckCategories, + List skipStoppableHealthCheckList, String namespace) { + _dataAccessor = + new HelixDataAccessorWrapper(dataAccessor, customRestClient, + namespace); + _configAccessor = configAccessor; + _customRestClient = customRestClient; + _skipZKRead = skipZKRead; + _nonBlockingHealthChecks = + nonBlockingHealthChecks == null ? Collections.emptySet() : nonBlockingHealthChecks; + _skipHealthCheckCategories = + skipHealthCheckCategories == null ? Collections.emptySet() : skipHealthCheckCategories; + _skipStoppableHealthCheckList = skipStoppableHealthCheckList == null ? Collections.emptyList() + : skipStoppableHealthCheckList; + _namespace = namespace; + } + /** * Perform health check and maintenance operation check and execution for a instance in * one cluster. @@ -463,7 +486,10 @@ private List batchCustomInstanceStoppableCheck(String clusterId, List toBeStoppedInstances) { LOG.info("Perform helix own custom health checks for {}/{}", clusterId, instanceName); + List healthChecksToExecute = new ArrayList<>(HealthCheck.STOPPABLE_CHECK_LIST); + healthChecksToExecute.removeAll(_skipStoppableHealthCheckList); Map helixStoppableCheck = - getInstanceHealthStatus(clusterId, instanceName, HealthCheck.STOPPABLE_CHECK_LIST, + getInstanceHealthStatus(clusterId, instanceName, healthChecksToExecute, toBeStoppedInstances); return new StoppableCheck(helixStoppableCheck, StoppableCheck.Category.HELIX_OWN_CHECK); @@ -771,4 +799,87 @@ protected Map getInstanceHealthStatus(String clusterId, String return healthStatus; } + + public static class MaintenanceManagementServiceBuilder { + private ConfigAccessor _configAccessor; + private boolean _skipZKRead; + private String _namespace; + private ZKHelixDataAccessor _dataAccessor; + private CustomRestClient _customRestClient; + private Set _nonBlockingHealthChecks; + private Set _skipHealthCheckCategories = Collections.emptySet(); + private List _skipStoppableHealthCheckList = Collections.emptyList(); + + public MaintenanceManagementServiceBuilder setConfigAccessor(ConfigAccessor configAccessor) { + _configAccessor = configAccessor; + return this; + } + + public MaintenanceManagementServiceBuilder setSkipZKRead(boolean skipZKRead) { + _skipZKRead = skipZKRead; + return this; + } + + public MaintenanceManagementServiceBuilder setNamespace(String namespace) { + _namespace = namespace; + return this; + } + + public MaintenanceManagementServiceBuilder setDataAccessor( + ZKHelixDataAccessor dataAccessor) { + _dataAccessor = dataAccessor; + return this; + } + + public MaintenanceManagementServiceBuilder setCustomRestClient( + CustomRestClient customRestClient) { + _customRestClient = customRestClient; + return this; + } + + public MaintenanceManagementServiceBuilder setNonBlockingHealthChecks( + Set nonBlockingHealthChecks) { + _nonBlockingHealthChecks = nonBlockingHealthChecks; + return this; + } + + public MaintenanceManagementServiceBuilder setSkipHealthCheckCategories( + Set skipHealthCheckCategories) { + _skipHealthCheckCategories = skipHealthCheckCategories; + return this; + } + + public MaintenanceManagementServiceBuilder setSkipStoppableHealthCheckList( + List skipStoppableHealthCheckList) { + _skipStoppableHealthCheckList = skipStoppableHealthCheckList; + return this; + } + + public MaintenanceManagementService build() { + validate(); + return new MaintenanceManagementService(_dataAccessor, _configAccessor, _customRestClient, + _skipZKRead, _nonBlockingHealthChecks, _skipHealthCheckCategories, + _skipStoppableHealthCheckList, _namespace); + } + + private void validate() throws IllegalArgumentException { + List msg = new ArrayList<>(); + if (_configAccessor == null) { + msg.add("'configAccessor' can't be null."); + } + if (_namespace == null) { + msg.add("'namespace' can't be null."); + } + if (_dataAccessor == null) { + msg.add("'_dataAccessor' can't be null."); + } + if (_customRestClient == null) { + msg.add("'customRestClient' can't be null."); + } + if (msg.size() != 0) { + throw new IllegalArgumentException( + "One or more mandatory arguments are not set " + msg); + } + } + } } diff --git a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java index 8cf8bc83cb..877aaa9c89 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java @@ -34,6 +34,10 @@ import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; +import org.apache.helix.PropertyKey; +import org.apache.helix.constants.InstanceConstants; +import org.apache.helix.manager.zk.ZKHelixDataAccessor; +import org.apache.helix.model.InstanceConfig; import org.apache.helix.rest.server.json.cluster.ClusterTopology; import org.apache.helix.rest.server.json.instance.StoppableCheck; import org.apache.helix.rest.server.resources.helix.InstancesAccessor; @@ -48,15 +52,17 @@ public class StoppableInstancesSelector { private final String _customizedInput; private final MaintenanceManagementService _maintenanceService; private final ClusterTopology _clusterTopology; + private final ZKHelixDataAccessor _dataAccessor; - public StoppableInstancesSelector(String clusterId, List orderOfZone, + private StoppableInstancesSelector(String clusterId, List orderOfZone, String customizedInput, MaintenanceManagementService maintenanceService, - ClusterTopology clusterTopology) { + ClusterTopology clusterTopology, ZKHelixDataAccessor dataAccessor) { _clusterId = clusterId; _orderOfZone = orderOfZone; _customizedInput = customizedInput; _maintenanceService = maintenanceService; _clusterTopology = clusterTopology; + _dataAccessor = dataAccessor; } /** @@ -66,7 +72,7 @@ public StoppableInstancesSelector(String clusterId, List orderOfZone, * reasons for non-stoppability. * * @param instances A list of instance to be evaluated. - * @param toBeStoppedInstances A list of instances presumed to be are already stopped + * @param toBeStoppedInstances A list of instances presumed to be already stopped * @return An ObjectNode containing: * - 'stoppableNode': List of instances that can be stopped. * - 'instance_not_stoppable_with_reasons': A map with the instance name as the key and @@ -81,6 +87,7 @@ public ObjectNode getStoppableInstancesInSingleZone(List instances, ObjectNode failedStoppableInstances = result.putObject( InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); Set toBeStoppedInstancesSet = new HashSet<>(toBeStoppedInstances); + collectEvacuatingInstances(toBeStoppedInstancesSet); List zoneBasedInstance = getZoneBasedInstances(instances, _clusterTopology.toZoneMapping()); @@ -97,7 +104,7 @@ public ObjectNode getStoppableInstancesInSingleZone(List instances, * non-stoppability. * * @param instances A list of instance to be evaluated. - * @param toBeStoppedInstances A list of instances presumed to be are already stopped + * @param toBeStoppedInstances A list of instances presumed to be already stopped * @return An ObjectNode containing: * - 'stoppableNode': List of instances that can be stopped. * - 'instance_not_stoppable_with_reasons': A map with the instance name as the key and @@ -112,6 +119,7 @@ public ObjectNode getStoppableInstancesCrossZones(List instances, ObjectNode failedStoppableInstances = result.putObject( InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); Set toBeStoppedInstancesSet = new HashSet<>(toBeStoppedInstances); + collectEvacuatingInstances(toBeStoppedInstancesSet); Map> zoneMapping = _clusterTopology.toZoneMapping(); for (String zone : _orderOfZone) { @@ -249,12 +257,31 @@ private Map> getOrderedZoneToInstancesMap( (existing, replacement) -> existing, LinkedHashMap::new)); } + /** + * Collect instances marked for evacuation in the current topology and add them into the given set + * + * @param toBeStoppedInstances A set of instances we presume to be stopped. + */ + private void collectEvacuatingInstances(Set toBeStoppedInstances) { + Set allInstances = _clusterTopology.getAllInstances(); + for (String instance : allInstances) { + PropertyKey.Builder propertyKeyBuilder = _dataAccessor.keyBuilder(); + InstanceConfig instanceConfig = + _dataAccessor.getProperty(propertyKeyBuilder.instanceConfig(instance)); + if (InstanceConstants.InstanceOperation.EVACUATE.name() + .equals(instanceConfig.getInstanceOperation())) { + toBeStoppedInstances.add(instance); + } + } + } + public static class StoppableInstancesSelectorBuilder { private String _clusterId; private List _orderOfZone; private String _customizedInput; private MaintenanceManagementService _maintenanceService; private ClusterTopology _clusterTopology; + private ZKHelixDataAccessor _dataAccessor; public StoppableInstancesSelectorBuilder setClusterId(String clusterId) { _clusterId = clusterId; @@ -282,9 +309,14 @@ public StoppableInstancesSelectorBuilder setClusterTopology(ClusterTopology clus return this; } + public StoppableInstancesSelectorBuilder setDataAccessor(ZKHelixDataAccessor dataAccessor) { + _dataAccessor = dataAccessor; + return this; + } + public StoppableInstancesSelector build() { return new StoppableInstancesSelector(_clusterId, _orderOfZone, _customizedInput, - _maintenanceService, _clusterTopology); + _maintenanceService, _clusterTopology, _dataAccessor); } } } diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java index 785195ebe1..fcad387dce 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java @@ -20,12 +20,12 @@ */ import java.io.IOException; -import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.POST; @@ -46,6 +46,8 @@ import org.apache.helix.manager.zk.ZKHelixDataAccessor; import org.apache.helix.model.ClusterConfig; import org.apache.helix.model.InstanceConfig; +import org.apache.helix.rest.client.CustomRestClientFactory; +import org.apache.helix.rest.clusterMaintenanceService.HealthCheck; import org.apache.helix.rest.clusterMaintenanceService.MaintenanceManagementService; import org.apache.helix.rest.common.HttpConstants; import org.apache.helix.rest.clusterMaintenanceService.StoppableInstancesSelector; @@ -59,10 +61,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.helix.rest.clusterMaintenanceService.MaintenanceManagementService.ALL_HEALTH_CHECK_NONBLOCK; + @ClusterAuth @Path("/clusters/{clusterId}/instances") public class InstancesAccessor extends AbstractHelixResource { private final static Logger _logger = LoggerFactory.getLogger(InstancesAccessor.class); + public enum InstancesProperties { instances, online, @@ -70,6 +75,7 @@ public enum InstancesProperties { selection_base, zone_order, to_be_stopped_instances, + skip_stoppable_check_list, customized_values, instance_stoppable_parallel, instance_not_stoppable_with_reasons @@ -228,6 +234,9 @@ private Response batchGetStoppableInstances(String clusterId, JsonNode node, boo List orderOfZone = null; String customizedInput = null; List toBeStoppedInstances = Collections.emptyList(); + // By default, if skip_stoppable_check_list is unset, all checks are performed to maintain + // backward compatibility with existing clients. + List skipStoppableCheckList = Collections.emptyList(); if (node.get(InstancesAccessor.InstancesProperties.customized_values.name()) != null) { customizedInput = node.get(InstancesAccessor.InstancesProperties.customized_values.name()).toString(); @@ -260,10 +269,36 @@ private Response batchGetStoppableInstances(String clusterId, JsonNode node, boo } } + if (node.get(InstancesProperties.skip_stoppable_check_list.name()) != null) { + List list = OBJECT_MAPPER.readValue( + node.get(InstancesProperties.skip_stoppable_check_list.name()).toString(), + OBJECT_MAPPER.getTypeFactory().constructCollectionType(List.class, String.class)); + try { + skipStoppableCheckList = + list.stream().map(HealthCheck::valueOf).collect(Collectors.toList()); + } catch (IllegalArgumentException e) { + String message = + "'skip_stoppable_check_list' has invalid check names: " + list + + ". Supported checks: " + HealthCheck.STOPPABLE_CHECK_LIST; + _logger.error(message, e); + return badRequest(message); + } + } + + String namespace = getNamespace(); MaintenanceManagementService maintenanceService = - new MaintenanceManagementService((ZKHelixDataAccessor) getDataAccssor(clusterId), - getConfigAccessor(), skipZKRead, continueOnFailures, skipHealthCheckCategories, - getNamespace()); + new MaintenanceManagementService.MaintenanceManagementServiceBuilder() + .setDataAccessor((ZKHelixDataAccessor) getDataAccssor(clusterId)) + .setConfigAccessor(getConfigAccessor()) + .setSkipZKRead(skipZKRead) + .setNonBlockingHealthChecks( + continueOnFailures ? Collections.singleton(ALL_HEALTH_CHECK_NONBLOCK) : null) + .setCustomRestClient(CustomRestClientFactory.get()) + .setSkipHealthCheckCategories(skipHealthCheckCategories) + .setNamespace(namespace) + .setSkipStoppableHealthCheckList(skipStoppableCheckList) + .build(); + ClusterService clusterService = new ClusterServiceImpl(getDataAccssor(clusterId), getConfigAccessor()); ClusterTopology clusterTopology = clusterService.getClusterTopology(clusterId); @@ -274,6 +309,7 @@ private Response batchGetStoppableInstances(String clusterId, JsonNode node, boo .setCustomizedInput(customizedInput) .setMaintenanceService(maintenanceService) .setClusterTopology(clusterTopology) + .setDataAccessor((ZKHelixDataAccessor) getDataAccssor(clusterId)) .build(); stoppableInstancesSelector.calculateOrderOfZone(instances, random); ObjectNode result; diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java b/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java index 68561ce839..6b357a384e 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java @@ -621,8 +621,6 @@ private void preSetupForCrosszoneParallelInstancesStoppableTest(String clusterNa clusterConfig.setFaultZoneType("helixZoneId"); clusterConfig.setPersistIntermediateAssignment(true); _configAccessor.setClusterConfig(clusterName, clusterConfig); - RESTConfig emptyRestConfig = new RESTConfig(clusterName); - _configAccessor.setRESTConfig(clusterName, emptyRestConfig); // Create instance configs List instanceConfigs = new ArrayList<>(); int perZoneInstancesCount = 3; diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java b/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java index 2bc539a4d4..92dfff0024 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java @@ -34,6 +34,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.apache.helix.TestHelper; +import org.apache.helix.constants.InstanceConstants; import org.apache.helix.model.ClusterConfig; import org.apache.helix.model.InstanceConfig; import org.apache.helix.rest.server.resources.helix.InstancesAccessor; @@ -113,7 +114,7 @@ public void testInstanceStoppableZoneBasedWithToBeStoppedInstances() throws IOEx System.out.println("End test :" + TestHelper.getTestMethodName()); } - @Test + @Test(dependsOnMethods = "testInstanceStoppableZoneBasedWithToBeStoppedInstances") public void testInstanceStoppableZoneBasedWithoutZoneOrder() throws IOException { System.out.println("Start test :" + TestHelper.getTestMethodName()); String content = String.format( @@ -144,7 +145,8 @@ public void testInstanceStoppableZoneBasedWithoutZoneOrder() throws IOException System.out.println("End test :" + TestHelper.getTestMethodName()); } - @Test(dataProvider = "generatePayloadCrossZoneStoppableCheckWithZoneOrder") + @Test(dataProvider = "generatePayloadCrossZoneStoppableCheckWithZoneOrder", + dependsOnMethods = "testInstanceStoppableZoneBasedWithoutZoneOrder") public void testCrossZoneStoppableWithZoneOrder(String content) throws IOException { System.out.println("Start test :" + TestHelper.getTestMethodName()); Response response = new JerseyUriRequestBuilder( @@ -166,7 +168,7 @@ public void testCrossZoneStoppableWithZoneOrder(String content) throws IOExcepti System.out.println("End test :" + TestHelper.getTestMethodName()); } - @Test + @Test(dependsOnMethods = "testCrossZoneStoppableWithZoneOrder") public void testCrossZoneStoppableWithoutZoneOrder() throws IOException { System.out.println("Start test :" + TestHelper.getTestMethodName()); String content = String.format( @@ -199,8 +201,97 @@ public void testCrossZoneStoppableWithoutZoneOrder() throws IOException { System.out.println("End test :" + TestHelper.getTestMethodName()); } + @Test(dependsOnMethods = "testCrossZoneStoppableWithoutZoneOrder") + public void testInstanceStoppableCrossZoneBasedWithSelectedCheckList() throws IOException { + System.out.println("Start test :" + TestHelper.getTestMethodName()); + // Select instances with cross zone based and perform all checks + String content = + String.format("{\"%s\":\"%s\",\"%s\":[\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\", \"%s\"], \"%s\":[\"%s\"]}", + InstancesAccessor.InstancesProperties.selection_base.name(), + InstancesAccessor.InstanceHealthSelectionBase.cross_zone_based.name(), + InstancesAccessor.InstancesProperties.instances.name(), "instance0", "instance1", + "instance2", "instance3", "instance4", "instance5", "invalidInstance", + InstancesAccessor.InstancesProperties.skip_stoppable_check_list.name(), "DUMMY_TEST_NO_EXISTS"); - @Test(dependsOnMethods = "testInstanceStoppableZoneBasedWithToBeStoppedInstances") + new JerseyUriRequestBuilder("clusters/{}/instances?command=stoppable").format(STOPPABLE_CLUSTER) + .isBodyReturnExpected(true) + .expectedReturnStatusCode(Response.Status.BAD_REQUEST.getStatusCode()) + .post(this, Entity.entity(content, MediaType.APPLICATION_JSON_TYPE)); + + // Select instances with cross zone based and perform a subset of checks + content = String.format( + "{\"%s\":\"%s\",\"%s\":[\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\", \"%s\"], \"%s\":[\"%s\",\"%s\"], \"%s\":[\"%s\", \"%s\"]}", + InstancesAccessor.InstancesProperties.selection_base.name(), + InstancesAccessor.InstanceHealthSelectionBase.cross_zone_based.name(), + InstancesAccessor.InstancesProperties.instances.name(), "instance0", "instance1", + "instance2", "instance3", "instance4", "instance5", "invalidInstance", + InstancesAccessor.InstancesProperties.zone_order.name(), "zone2", "zone1", + InstancesAccessor.InstancesProperties.skip_stoppable_check_list.name(), "INSTANCE_NOT_ENABLED", "INSTANCE_NOT_STABLE"); + Response response = new JerseyUriRequestBuilder( + "clusters/{}/instances?command=stoppable&skipHealthCheckCategories=CUSTOM_INSTANCE_CHECK,CUSTOM_PARTITION_CHECK").format( + STOPPABLE_CLUSTER).post(this, Entity.entity(content, MediaType.APPLICATION_JSON_TYPE)); + JsonNode jsonNode = OBJECT_MAPPER.readTree(response.readEntity(String.class)); + JsonNode nonStoppableInstances = jsonNode.get( + InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance5"), + ImmutableSet.of("HELIX:EMPTY_RESOURCE_ASSIGNMENT", "HELIX:INSTANCE_NOT_ALIVE")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance4"), + ImmutableSet.of("HELIX:EMPTY_RESOURCE_ASSIGNMENT", "HELIX:INSTANCE_NOT_ALIVE")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance1"), + ImmutableSet.of("HELIX:EMPTY_RESOURCE_ASSIGNMENT")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "invalidInstance"), + ImmutableSet.of("HELIX:INSTANCE_NOT_EXIST")); + + System.out.println("End test :" + TestHelper.getTestMethodName()); + } + + @Test(dependsOnMethods = "testInstanceStoppableCrossZoneBasedWithSelectedCheckList") + public void testInstanceStoppableCrossZoneBasedWithEvacuatingInstances() throws IOException { + System.out.println("Start test :" + TestHelper.getTestMethodName()); + String content = String.format( + "{\"%s\":\"%s\",\"%s\":[\"%s\",\"%s\",\"%s\",\"%s\", \"%s\", \"%s\", \"%s\",\"%s\", \"%s\", \"%s\"]}", + InstancesAccessor.InstancesProperties.selection_base.name(), + InstancesAccessor.InstanceHealthSelectionBase.cross_zone_based.name(), + InstancesAccessor.InstancesProperties.instances.name(), "instance1", "instance3", + "instance6", "instance9", "instance10", "instance11", "instance12", "instance13", + "instance14", "invalidInstance"); + + // Change instance config of instance1 & instance0 to be evacuating + String instance0 = "instance0"; + InstanceConfig instanceConfig = _configAccessor.getInstanceConfig(STOPPABLE_CLUSTER2, instance0); + instanceConfig.setInstanceOperation(InstanceConstants.InstanceOperation.EVACUATE); + _configAccessor.setInstanceConfig(STOPPABLE_CLUSTER2, instance0, instanceConfig); + String instance1 = "instance1"; + InstanceConfig instanceConfig1 = _configAccessor.getInstanceConfig(STOPPABLE_CLUSTER2, instance1); + instanceConfig1.setInstanceOperation(InstanceConstants.InstanceOperation.EVACUATE); + _configAccessor.setInstanceConfig(STOPPABLE_CLUSTER2, instance1, instanceConfig1); + // It takes time to reflect the changes. + BestPossibleExternalViewVerifier verifier = + new BestPossibleExternalViewVerifier.Builder(STOPPABLE_CLUSTER2).setZkAddr(ZK_ADDR).build(); + Assert.assertTrue(verifier.verifyByPolling()); + + Response response = new JerseyUriRequestBuilder( + "clusters/{}/instances?command=stoppable&skipHealthCheckCategories=CUSTOM_INSTANCE_CHECK,CUSTOM_PARTITION_CHECK").format( + STOPPABLE_CLUSTER2).post(this, Entity.entity(content, MediaType.APPLICATION_JSON_TYPE)); + JsonNode jsonNode = OBJECT_MAPPER.readTree(response.readEntity(String.class)); + + Set stoppableSet = getStringSet(jsonNode, + InstancesAccessor.InstancesProperties.instance_stoppable_parallel.name()); + Assert.assertTrue(stoppableSet.contains("instance12") + && stoppableSet.contains("instance11") && stoppableSet.contains("instance10")); + + JsonNode nonStoppableInstances = jsonNode.get( + InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance13"), + ImmutableSet.of("HELIX:MIN_ACTIVE_REPLICA_CHECK_FAILED")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance14"), + ImmutableSet.of("HELIX:MIN_ACTIVE_REPLICA_CHECK_FAILED")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "invalidInstance"), + ImmutableSet.of("HELIX:INSTANCE_NOT_EXIST")); + System.out.println("End test :" + TestHelper.getTestMethodName()); + } + + @Test(dependsOnMethods = "testInstanceStoppableCrossZoneBasedWithEvacuatingInstances") public void testInstanceStoppable_zoneBased_zoneOrder() throws IOException { System.out.println("Start test :" + TestHelper.getTestMethodName()); // Select instances with zone based From 9dff9838dcc1de6f6e086396c3dd27ff4251008d Mon Sep 17 00:00:00 2001 From: Zachary Pinto Date: Fri, 10 Nov 2023 08:56:13 -0800 Subject: [PATCH 05/11] Change canCompleteSwap and completeSwapIfPossible to return json with kv pair for result of check or attempt to complete swap. (#2697) --- .../resources/helix/PerInstanceAccessor.java | 12 ++++------- .../rest/server/TestPerInstanceAccessor.java | 20 +++++++++++++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/PerInstanceAccessor.java b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/PerInstanceAccessor.java index b920f66ce8..b1b4f82642 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/PerInstanceAccessor.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/PerInstanceAccessor.java @@ -436,15 +436,11 @@ public Response updateInstance(@PathParam("clusterId") String clusterId, admin.setInstanceOperation(clusterId, instanceName, state); break; case canCompleteSwap: - if (!admin.canCompleteSwap(clusterId, instanceName)) { - return badRequest("Swap is not ready to be completed!"); - } - break; + return OK(OBJECT_MAPPER.writeValueAsString( + Map.of("successful", admin.canCompleteSwap(clusterId, instanceName)))); case completeSwapIfPossible: - if (!admin.completeSwapIfPossible(clusterId, instanceName)) { - return badRequest("Swap is not ready to be completed!"); - } - break; + return OK(OBJECT_MAPPER.writeValueAsString( + Map.of("successful", admin.completeSwapIfPossible(clusterId, instanceName)))); case addInstanceTag: if (!validInstance(node, instanceName)) { return badRequest("Instance names are not match!"); diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java b/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java index 273019bd3f..a8675202a3 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java @@ -501,6 +501,26 @@ public void updateInstance() throws IOException { instanceConfig = _configAccessor.getInstanceConfig(CLUSTER_NAME, INSTANCE_NAME); Assert.assertEquals( instanceConfig.getInstanceOperation(), ""); + + // test canCompleteSwap + Response canCompleteSwapResponse = + new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=canCompleteSwap").format( + CLUSTER_NAME, INSTANCE_NAME).post(this, entity); + Assert.assertEquals(canCompleteSwapResponse.getStatus(), Response.Status.OK.getStatusCode()); + Map responseMap = + OBJECT_MAPPER.readValue(canCompleteSwapResponse.readEntity(String.class), Map.class); + Assert.assertFalse((boolean) responseMap.get("successful")); + + // test completeSwapIfPossible + Response completeSwapIfPossibleResponse = new JerseyUriRequestBuilder( + "clusters/{}/instances/{}?command=completeSwapIfPossible").format(CLUSTER_NAME, + INSTANCE_NAME).post(this, entity); + Assert.assertEquals(completeSwapIfPossibleResponse.getStatus(), + Response.Status.OK.getStatusCode()); + responseMap = + OBJECT_MAPPER.readValue(completeSwapIfPossibleResponse.readEntity(String.class), Map.class); + Assert.assertFalse((boolean) responseMap.get("successful")); + System.out.println("End test :" + TestHelper.getTestMethodName()); } From 12af3ebef9de719cec97d5cf92abe4fcbf4fb290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grant=20Pal=C3=A1u=20Spencer?= Date: Tue, 14 Nov 2023 00:05:34 -0800 Subject: [PATCH 06/11] Expose Evacuate Finished API in Helix-Rest (#2694) --- .../server/resources/AbstractResource.java | 3 +- .../resources/helix/PerInstanceAccessor.java | 10 ++++++ .../rest/server/TestPerInstanceAccessor.java | 33 +++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/AbstractResource.java b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/AbstractResource.java index 64fbaff412..ce3d27273e 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/AbstractResource.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/AbstractResource.java @@ -88,7 +88,8 @@ public enum Command { setInstanceOperation, // TODO: Name is just a place holder, may change in future canCompleteSwap, completeSwapIfPossible, - onDemandRebalance + onDemandRebalance, + isEvacuateFinished } @Context diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/PerInstanceAccessor.java b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/PerInstanceAccessor.java index b1b4f82642..f380975a33 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/PerInstanceAccessor.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/PerInstanceAccessor.java @@ -477,6 +477,16 @@ public Response updateInstance(@PathParam("clusterId") String clusterId, OBJECT_MAPPER.getTypeFactory() .constructCollectionType(List.class, String.class))); break; + case isEvacuateFinished: + boolean evacuateFinished; + try { + evacuateFinished = admin.isEvacuateFinished(clusterId, instanceName); + } catch (HelixException e) { + LOG.error(String.format("Encountered error when checking if evacuation finished for cluster: " + + "{}, instance: {}", clusterId, instanceName), e); + return serverError(e); + } + return OK(OBJECT_MAPPER.writeValueAsString(Map.of("successful", evacuateFinished))); default: LOG.error("Unsupported command :" + command); return badRequest("Unsupported command :" + command); diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java b/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java index a8675202a3..a1f46cce96 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java @@ -521,6 +521,39 @@ public void updateInstance() throws IOException { OBJECT_MAPPER.readValue(completeSwapIfPossibleResponse.readEntity(String.class), Map.class); Assert.assertFalse((boolean) responseMap.get("successful")); + // test isEvacuateFinished on instance with EVACUATE but has currentState + new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=setInstanceOperation&instanceOperation=EVACUATE") + .format(CLUSTER_NAME, INSTANCE_NAME).post(this, entity); + instanceConfig = _configAccessor.getInstanceConfig(CLUSTER_NAME, INSTANCE_NAME); + Assert.assertEquals( + instanceConfig.getInstanceOperation(), InstanceConstants.InstanceOperation.EVACUATE.toString()); + + Response response = new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=isEvacuateFinished") + .format(CLUSTER_NAME, INSTANCE_NAME).post(this, entity); + Map evacuateFinishedresult = OBJECT_MAPPER.readValue(response.readEntity(String.class), Map.class); + Assert.assertEquals(response.getStatus(), Response.Status.OK.getStatusCode()); + Assert.assertFalse(evacuateFinishedresult.get("successful")); + + // test isEvacuateFinished on instance with EVACUATE and no currentState + // Create new instance so no currentState or messages assigned to it + String test_instance_name = INSTANCE_NAME + "_foo"; + InstanceConfig newInstanceConfig = new InstanceConfig(test_instance_name); + Entity instanceEntity = Entity.entity(OBJECT_MAPPER.writeValueAsString(newInstanceConfig.getRecord()), + MediaType.APPLICATION_JSON_TYPE); + new JerseyUriRequestBuilder("clusters/{}/instances/{}").format(CLUSTER_NAME, test_instance_name) + .put(this, instanceEntity); + + new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=setInstanceOperation&instanceOperation=EVACUATE") + .format(CLUSTER_NAME, test_instance_name).post(this, entity); + instanceConfig = _configAccessor.getInstanceConfig(CLUSTER_NAME, test_instance_name); + Assert.assertEquals( + instanceConfig.getInstanceOperation(), InstanceConstants.InstanceOperation.EVACUATE.toString()); + + response = new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=isEvacuateFinished") + .format(CLUSTER_NAME, test_instance_name).post(this, entity); + evacuateFinishedresult = OBJECT_MAPPER.readValue(response.readEntity(String.class), Map.class); + Assert.assertEquals(response.getStatus(), Response.Status.OK.getStatusCode()); + Assert.assertTrue(evacuateFinishedresult.get("successful")); System.out.println("End test :" + TestHelper.getTestMethodName()); } From 194d4e8a0cf4ab8e5aa76cd5e32d439c10ad3d53 Mon Sep 17 00:00:00 2001 From: Zachary Pinto Date: Tue, 28 Nov 2023 11:06:02 -0800 Subject: [PATCH 07/11] Fix WAGED to only use logicalId when computing baseline and centralize picking assignable instances in the cache. (#2702) Fix WAGED to only use logicalId when computing baseline and centralize picking assignable instances in the cache. --- .../ResourceChangeDetector.java | 4 +- .../ResourceChangeSnapshot.java | 41 +- .../BaseControllerDataProvider.java | 365 +++++++++++++----- .../WorkflowControllerDataProvider.java | 3 +- .../rebalancer/AbstractRebalancer.java | 11 +- .../controller/rebalancer/AutoRebalancer.java | 34 +- .../rebalancer/CustomRebalancer.java | 4 +- .../rebalancer/DelayedAutoRebalancer.java | 75 ++-- ...ractEvenDistributionRebalanceStrategy.java | 5 +- .../strategy/ConstraintRebalanceStrategy.java | 8 +- .../strategy/CrushRebalanceStrategy.java | 2 +- .../MultiRoundCrushRebalanceStrategy.java | 2 +- .../rebalancer/util/DelayedRebalanceUtil.java | 94 +---- .../waged/GlobalRebalanceRunner.java | 23 +- .../rebalancer/waged/WagedRebalancer.java | 61 +-- .../AbstractPartitionMovementConstraint.java | 6 +- .../BaselineInfluenceConstraint.java | 2 +- .../constraints/ConstraintBasedAlgorithm.java | 10 +- .../PartitionMovementConstraint.java | 6 +- .../waged/model/ClusterModelProvider.java | 107 ++++- .../stages/BestPossibleStateCalcStage.java | 65 +++- .../stages/IntermediateStateCalcStage.java | 10 +- .../stages/MaintenanceRecoveryStage.java | 2 +- .../stages/MessageGenerationPhase.java | 3 +- .../stages/ReadClusterDataStage.java | 2 - .../stages/ResourceComputationStage.java | 2 - .../stages/task/TaskSchedulingStage.java | 1 - .../apache/helix/manager/zk/ZKHelixAdmin.java | 14 +- .../helix/model/ResourceAssignment.java | 1 - .../helix/task/AbstractTaskDispatcher.java | 3 +- .../org/apache/helix/task/JobDispatcher.java | 6 +- .../tools/ClusterExternalViewVerifier.java | 2 +- .../StrictMatchExternalViewVerifier.java | 12 +- .../java/org/apache/helix/util/HelixUtil.java | 6 +- .../trimmer/TestHelixPropoertyTimmer.java | 4 +- .../rebalancer/TestAutoRebalanceStrategy.java | 2 +- .../rebalancer/waged/TestWagedRebalancer.java | 43 ++- .../waged/TestWagedRebalancerMetrics.java | 9 +- .../TestPartitionMovementConstraint.java | 5 + .../waged/model/AbstractTestClusterModel.java | 5 +- .../waged/model/ClusterModelTestHelper.java | 2 +- .../waged/model/TestAssignableNode.java | 22 +- .../waged/model/TestClusterModelProvider.java | 31 +- ...estBestPossibleCalcStageCompatibility.java | 3 +- .../TestBestPossibleStateCalcStage.java | 1 + .../TestCancellationMessageGeneration.java | 1 - .../TestIntermediateStateCalcStage.java | 1 + .../stages/TestRebalancePipeline.java | 26 +- .../stages/TestReplicaLevelThrottling.java | 2 +- ...flineNodeTimeoutDuringMaintenanceMode.java | 12 +- .../messaging/TestP2PMessageSemiAuto.java | 2 +- .../messaging/TestP2PNoDuplicatedMessage.java | 4 +- .../rebalancer/TestAutoRebalance.java | 6 +- .../TestAutoRebalancePartitionLimit.java | 2 +- .../rebalancer/TestCustomRebalancer.java | 2 +- .../TestCustomizedIdealStateRebalancer.java | 8 +- .../rebalancer/TestInstanceOperation.java | 54 +-- .../WagedRebalancer/TestWagedNodeSwap.java | 2 +- .../messaging/p2pMessage/TestP2PMessages.java | 8 +- .../task/TestTargetedTaskStateChange.java | 8 +- .../helix/rest/server/AbstractTestClass.java | 2 +- .../rest/server/TestInstancesAccessor.java | 2 +- 62 files changed, 757 insertions(+), 504 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/controller/changedetector/ResourceChangeDetector.java b/helix-core/src/main/java/org/apache/helix/controller/changedetector/ResourceChangeDetector.java index ae8253ffbd..4e0b706b8f 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/changedetector/ResourceChangeDetector.java +++ b/helix-core/src/main/java/org/apache/helix/controller/changedetector/ResourceChangeDetector.java @@ -113,13 +113,13 @@ private void clearCachedComputation() { HelixConstants.ChangeType changeType, ResourceChangeSnapshot snapshot) { switch (changeType) { case INSTANCE_CONFIG: - return snapshot.getInstanceConfigMap(); + return snapshot.getAssignableInstanceConfigMap(); case IDEAL_STATE: return snapshot.getIdealStateMap(); case RESOURCE_CONFIG: return snapshot.getResourceConfigMap(); case LIVE_INSTANCE: - return snapshot.getLiveInstances(); + return snapshot.getAssignableLiveInstances(); case CLUSTER_CONFIG: ClusterConfig config = snapshot.getClusterConfig(); if (config == null) { diff --git a/helix-core/src/main/java/org/apache/helix/controller/changedetector/ResourceChangeSnapshot.java b/helix-core/src/main/java/org/apache/helix/controller/changedetector/ResourceChangeSnapshot.java index 39fccd8d0f..f37cae635f 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/changedetector/ResourceChangeSnapshot.java +++ b/helix-core/src/main/java/org/apache/helix/controller/changedetector/ResourceChangeSnapshot.java @@ -50,10 +50,12 @@ class ResourceChangeSnapshot { private Set _changedTypes; - private Map _instanceConfigMap; + private Map _allInstanceConfigMap; + private Map _assignableInstanceConfigMap; private Map _idealStateMap; private Map _resourceConfigMap; - private Map _liveInstances; + private Map _allLiveInstances; + private Map _assignableLiveInstances; private ClusterConfig _clusterConfig; /** @@ -61,10 +63,12 @@ class ResourceChangeSnapshot { */ ResourceChangeSnapshot() { _changedTypes = new HashSet<>(); - _instanceConfigMap = new HashMap<>(); + _allInstanceConfigMap = new HashMap<>(); + _assignableInstanceConfigMap = new HashMap<>(); _idealStateMap = new HashMap<>(); _resourceConfigMap = new HashMap<>(); - _liveInstances = new HashMap<>(); + _allLiveInstances = new HashMap<>(); + _assignableLiveInstances = new HashMap<>(); _clusterConfig = null; } @@ -80,12 +84,16 @@ class ResourceChangeSnapshot { ResourceChangeSnapshot(ResourceControllerDataProvider dataProvider, boolean ignoreNonTopologyChange) { _changedTypes = new HashSet<>(dataProvider.getRefreshedChangeTypes()); - - _instanceConfigMap = ignoreNonTopologyChange ? + _allInstanceConfigMap = ignoreNonTopologyChange ? dataProvider.getInstanceConfigMap().entrySet().parallelStream().collect(Collectors .toMap(e -> e.getKey(), e -> InstanceConfigTrimmer.getInstance().trimProperty(e.getValue()))) : new HashMap<>(dataProvider.getInstanceConfigMap()); + _assignableInstanceConfigMap = ignoreNonTopologyChange ? + dataProvider.getAssignableInstanceConfigMap().entrySet().parallelStream().collect(Collectors + .toMap(e -> e.getKey(), + e -> InstanceConfigTrimmer.getInstance().trimProperty(e.getValue()))) : + new HashMap<>(dataProvider.getAssignableInstanceConfigMap()); _idealStateMap = ignoreNonTopologyChange ? dataProvider.getIdealStates().entrySet().parallelStream().collect(Collectors .toMap(e -> e.getKey(), @@ -99,7 +107,8 @@ class ResourceChangeSnapshot { _clusterConfig = ignoreNonTopologyChange ? ClusterConfigTrimmer.getInstance().trimProperty(dataProvider.getClusterConfig()) : dataProvider.getClusterConfig(); - _liveInstances = new HashMap<>(dataProvider.getLiveInstances()); + _allLiveInstances = new HashMap<>(dataProvider.getLiveInstances()); + _assignableLiveInstances = new HashMap<>(dataProvider.getAssignableLiveInstances()); } /** @@ -108,10 +117,12 @@ class ResourceChangeSnapshot { */ ResourceChangeSnapshot(ResourceChangeSnapshot snapshot) { _changedTypes = new HashSet<>(snapshot._changedTypes); - _instanceConfigMap = new HashMap<>(snapshot._instanceConfigMap); + _allInstanceConfigMap = new HashMap<>(snapshot._allInstanceConfigMap); + _assignableInstanceConfigMap = new HashMap<>(snapshot._assignableInstanceConfigMap); _idealStateMap = new HashMap<>(snapshot._idealStateMap); _resourceConfigMap = new HashMap<>(snapshot._resourceConfigMap); - _liveInstances = new HashMap<>(snapshot._liveInstances); + _allLiveInstances = new HashMap<>(snapshot._allLiveInstances); + _assignableLiveInstances = new HashMap<>(snapshot._assignableLiveInstances); _clusterConfig = snapshot._clusterConfig; } @@ -120,7 +131,11 @@ Set getChangedTypes() { } Map getInstanceConfigMap() { - return _instanceConfigMap; + return _allInstanceConfigMap; + } + + Map getAssignableInstanceConfigMap() { + return _assignableInstanceConfigMap; } Map getIdealStateMap() { @@ -132,7 +147,11 @@ Map getResourceConfigMap() { } Map getLiveInstances() { - return _liveInstances; + return _allLiveInstances; + } + + Map getAssignableLiveInstances() { + return _assignableLiveInstances; } ClusterConfig getClusterConfig() { diff --git a/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java b/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java index 9dd5173841..9120bd9622 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java +++ b/helix-core/src/main/java/org/apache/helix/controller/dataproviders/BaseControllerDataProvider.java @@ -102,8 +102,8 @@ public class BaseControllerDataProvider implements ControlContextProvider { // Property caches private final PropertyCache _resourceConfigCache; - private final PropertyCache _instanceConfigCache; - private final PropertyCache _liveInstanceCache; + private final PropertyCache _allInstanceConfigCache; + private final PropertyCache _allLiveInstanceCache; private final PropertyCache _idealStateCache; private final PropertyCache _clusterConstraintsCache; private final PropertyCache _stateModelDefinitionCache; @@ -118,6 +118,12 @@ public class BaseControllerDataProvider implements ControlContextProvider { private Map> _idealStateRuleMap; private final Map>> _disabledInstanceForPartitionMap = new HashMap<>(); private final Set _disabledInstanceSet = new HashSet<>(); + + // Assignable instances are instances will contain at most one instance with a given logicalId. + // This is used for SWAP related operations where there can be two instances with the same logicalId. + private final Map _assignableInstanceConfigMap = new HashMap<>(); + private final Map _assignableLiveInstancesMap = new HashMap<>(); + private final Set _assignableDisabledInstanceSet = new HashSet<>(); private final Map _swapOutInstanceNameToSwapInInstanceName = new HashMap<>(); private final Set _enabledLiveSwapInInstanceNames = new HashSet<>(); private final Map _abnormalStateResolverMap = new HashMap<>(); @@ -154,7 +160,7 @@ public String getObjName(ResourceConfig obj) { return obj.getResourceName(); } }, true); - _liveInstanceCache = new PropertyCache<>(this, "LiveInstance", new PropertyCache.PropertyCacheKeyFuncs() { + _allLiveInstanceCache = new PropertyCache<>(this, "LiveInstance", new PropertyCache.PropertyCacheKeyFuncs() { @Override public PropertyKey getRootKey(HelixDataAccessor accessor) { return accessor.keyBuilder().liveInstances(); @@ -170,7 +176,7 @@ public String getObjName(LiveInstance obj) { return obj.getInstanceName(); } }, true); - _instanceConfigCache = new PropertyCache<>(this, "InstanceConfig", new PropertyCache.PropertyCacheKeyFuncs() { + _allInstanceConfigCache = new PropertyCache<>(this, "InstanceConfig", new PropertyCache.PropertyCacheKeyFuncs() { @Override public PropertyKey getRootKey(HelixDataAccessor accessor) { return accessor.keyBuilder().instanceConfigs(); @@ -310,7 +316,7 @@ private void refreshIdealState(final HelixDataAccessor accessor, private void refreshLiveInstances(final HelixDataAccessor accessor, Set refreshedType) { if (_propertyDataChangedMap.get(HelixConstants.ChangeType.LIVE_INSTANCE).getAndSet(false)) { - _liveInstanceCache.refresh(accessor); + _allLiveInstanceCache.refresh(accessor); _updateInstanceOfflineTime = true; refreshedType.add(HelixConstants.ChangeType.LIVE_INSTANCE); } else { @@ -323,10 +329,10 @@ private void refreshLiveInstances(final HelixDataAccessor accessor, private void refreshInstanceConfigs(final HelixDataAccessor accessor, Set refreshedType) { if (_propertyDataChangedMap.get(HelixConstants.ChangeType.INSTANCE_CONFIG).getAndSet(false)) { - _instanceConfigCache.refresh(accessor); + _allInstanceConfigCache.refresh(accessor); LogUtil.logInfo(logger, getClusterEventId(), String .format("Reloaded InstanceConfig for cluster %s, %s pipeline. Keys: %s", _clusterName, - getPipelineName(), _instanceConfigCache.getPropertyMap().keySet())); + getPipelineName(), _allInstanceConfigCache.getPropertyMap().keySet())); refreshedType.add(HelixConstants.ChangeType.INSTANCE_CONFIG); } else { LogUtil.logInfo(logger, getClusterEventId(), String @@ -335,6 +341,108 @@ private void refreshInstanceConfigs(final HelixDataAccessor accessor, } } + /** + * Refreshes the assignable instances and SWAP related caches. This should be called after + * liveInstance and instanceConfig caches are refreshed. To determine what instances are + * assignable and live, it takes a combination of both the all instanceConfigs and liveInstances. + * TODO: Add EVACUATE InstanceOperation to be filtered out in assignable nodes. + * + * @param instanceConfigMap InstanceConfig map from instanceConfig cache + * @param liveInstancesMap LiveInstance map from liveInstance cache + * @param clusterConfig ClusterConfig from clusterConfig cache + */ + private void updateInstanceSets(Map instanceConfigMap, + Map liveInstancesMap, ClusterConfig clusterConfig) { + + if (clusterConfig == null) { + logger.warn("Skip refreshing swapping instances because clusterConfig is null."); + return; + } + + ClusterTopologyConfig clusterTopologyConfig = + ClusterTopologyConfig.createFromClusterConfig(clusterConfig); + + // Clear all caches + _assignableInstanceConfigMap.clear(); + _assignableLiveInstancesMap.clear(); + _swapOutInstanceNameToSwapInInstanceName.clear(); + _enabledLiveSwapInInstanceNames.clear(); + + Map filteredInstancesByLogicalId = new HashMap<>(); + Map swapOutLogicalIdsByInstanceName = new HashMap<>(); + Map swapInInstancesByLogicalId = new HashMap<>(); + + for (Map.Entry entry : instanceConfigMap.entrySet()) { + String node = entry.getKey(); + InstanceConfig currentInstanceConfig = entry.getValue(); + + if (currentInstanceConfig == null) { + continue; + } + + String currentInstanceLogicalId = + currentInstanceConfig.getLogicalId(clusterTopologyConfig.getEndNodeType()); + + // Filter out instances with duplicate logical IDs. If there are duplicates, the instance with + // InstanceOperation SWAP_OUT will be chosen over the instance with SWAP_IN. SWAP_IN is not + // assignable. If there are duplicates with one node having no InstanceOperation and the other + // having SWAP_OUT, the node with no InstanceOperation will be chosen. This signifies SWAP + // completion, therefore making the node assignable. + if (filteredInstancesByLogicalId.containsKey(currentInstanceLogicalId)) { + String filteredNode = filteredInstancesByLogicalId.get(currentInstanceLogicalId); + InstanceConfig filteredDuplicateInstanceConfig = instanceConfigMap.get(filteredNode); + + if ((filteredDuplicateInstanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_IN.name()) + && currentInstanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name())) + || currentInstanceConfig.getInstanceOperation().isEmpty()) { + // If the already filtered instance is SWAP_IN and this instance is in SWAP_OUT, then replace the filtered + // instance with this instance. If this instance has no InstanceOperation, then replace the filtered instance + // with this instance. This is the case where the SWAP_IN node has been marked as complete or SWAP_IN exists and + // SWAP_OUT does not. There can never be a case where both have no InstanceOperation set. + _assignableInstanceConfigMap.remove(filteredNode); + _assignableInstanceConfigMap.put(node, currentInstanceConfig); + filteredInstancesByLogicalId.put(currentInstanceLogicalId, node); + } + } else { + _assignableInstanceConfigMap.put(node, currentInstanceConfig); + filteredInstancesByLogicalId.put(currentInstanceLogicalId, node); + } + + if (currentInstanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name())) { + swapOutLogicalIdsByInstanceName.put(currentInstanceConfig.getInstanceName(), + currentInstanceLogicalId); + } + + if (currentInstanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_IN.name())) { + swapInInstancesByLogicalId.put( + currentInstanceConfig.getLogicalId(clusterTopologyConfig.getEndNodeType()), + currentInstanceConfig.getInstanceName()); + } + } + + liveInstancesMap.forEach((instanceName, liveInstance) -> { + if (_assignableInstanceConfigMap.containsKey(instanceName)) { + _assignableLiveInstancesMap.put(instanceName, liveInstance); + } + }); + + swapOutLogicalIdsByInstanceName.forEach((swapOutInstanceName, value) -> { + String swapInInstanceName = swapInInstancesByLogicalId.get(value); + if (swapInInstanceName != null) { + _swapOutInstanceNameToSwapInInstanceName.put(swapOutInstanceName, swapInInstanceName); + if (liveInstancesMap.containsKey(swapInInstanceName) + && InstanceValidationUtil.isInstanceEnabled(instanceConfigMap.get(swapInInstanceName), + clusterConfig)) { + _enabledLiveSwapInInstanceNames.add(swapInInstanceName); + } + } + }); + } + private void refreshResourceConfig(final HelixDataAccessor accessor, Set refreshedType) { if (_propertyDataChangedMap.get(HelixConstants.ChangeType.RESOURCE_CONFIG).getAndSet(false)) { @@ -373,7 +481,7 @@ private void timeoutNodesDuringMaintenance(final HelixDataAccessor accessor, Clu timeOutWindow = clusterConfig.getOfflineNodeTimeOutForMaintenanceMode(); } if (timeOutWindow >= 0 && isMaintenanceModeEnabled) { - for (String instance : _liveInstanceCache.getPropertyMap().keySet()) { + for (String instance : _assignableLiveInstancesMap.keySet()) { // 1. Check timed-out cache and don't do repeated work; // 2. Check for nodes that didn't exist in the last iteration, because it has been checked; // 3. For all other nodes, check if it's timed-out. @@ -386,9 +494,8 @@ && isInstanceTimedOutDuringMaintenance(accessor, instance, timeOutWindow)) { } } if (isMaintenanceModeEnabled) { - _liveInstanceExcludeTimedOutForMaintenance = - _liveInstanceCache.getPropertyMap().entrySet().stream() - .filter(e -> !_timedOutInstanceDuringMaintenance.contains(e.getKey())) + _liveInstanceExcludeTimedOutForMaintenance = _assignableLiveInstancesMap.entrySet().stream() + .filter(e -> !_timedOutInstanceDuringMaintenance.contains(e.getKey())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } } @@ -421,6 +528,8 @@ protected synchronized Set doRefresh(HelixDataAccesso refreshIdealState(accessor, refreshedTypes); refreshLiveInstances(accessor, refreshedTypes); refreshInstanceConfigs(accessor, refreshedTypes); + updateInstanceSets(_allInstanceConfigCache.getPropertyMap(), _allLiveInstanceCache.getPropertyMap(), + _clusterConfig); refreshResourceConfig(accessor, refreshedTypes); _stateModelDefinitionCache.refresh(accessor); _clusterConstraintsCache.refresh(accessor); @@ -431,17 +540,19 @@ protected synchronized Set doRefresh(HelixDataAccesso updateOfflineInstanceHistory(accessor); // Refresh derived data - _instanceMessagesCache.refresh(accessor, _liveInstanceCache.getPropertyMap()); - _currentStateCache.refresh(accessor, _liveInstanceCache.getPropertyMap()); + // Must use _liveInstanceCache instead of _assignableLiveInstancesMap because we need to + // know about the messages and current state of all instances including the SWAP_IN ones. + _instanceMessagesCache.refresh(accessor, _allLiveInstanceCache.getPropertyMap()); + _currentStateCache.refresh(accessor, _allLiveInstanceCache.getPropertyMap()); // current state must be refreshed before refreshing relay messages // because we need to use current state to validate all relay messages. - _instanceMessagesCache.updateRelayMessages(_liveInstanceCache.getPropertyMap(), + _instanceMessagesCache.updateRelayMessages(_allLiveInstanceCache.getPropertyMap(), _currentStateCache.getParticipantStatesMap()); updateIdealRuleMap(getClusterConfig()); - updateDisabledInstances(getInstanceConfigMap().values(), getClusterConfig()); - updateSwappingInstances(getInstanceConfigMap().values(), getEnabledLiveInstances(), + updateDisabledInstances(getInstanceConfigMap().values(), + getAssignableInstanceConfigMap().values(), getClusterConfig()); return refreshedTypes; @@ -453,17 +564,18 @@ protected void dumpDebugInfo() { "# of StateModelDefinition read from zk: " + getStateModelDefMap().size()); LogUtil.logDebug(logger, getClusterEventId(), "# of ConstraintMap read from zk: " + getConstraintMap().size()); - LogUtil - .logDebug(logger, getClusterEventId(), "LiveInstances: " + getLiveInstances().keySet()); - for (LiveInstance instance : getLiveInstances().values()) { + LogUtil.logDebug(logger, getClusterEventId(), + "AssignableLiveInstances: " + getAssignableLiveInstances().keySet()); + for (LiveInstance instance : getAssignableLiveInstances().values()) { LogUtil.logDebug(logger, getClusterEventId(), - "live instance: " + instance.getInstanceName() + " " + instance.getEphemeralOwner()); + "assignable live instance: " + instance.getInstanceName() + " " + + instance.getEphemeralOwner()); } LogUtil.logDebug(logger, getClusterEventId(), "IdealStates: " + getIdealStates().keySet()); LogUtil.logDebug(logger, getClusterEventId(), "ResourceConfigs: " + getResourceConfigMap().keySet()); LogUtil.logDebug(logger, getClusterEventId(), - "InstanceConfigs: " + getInstanceConfigMap().keySet()); + "AssignableInstanceConfigs: " + getAssignableInstanceConfigMap().keySet()); LogUtil.logDebug(logger, getClusterEventId(), "ClusterConfigs: " + getClusterConfig()); } } @@ -476,8 +588,10 @@ public void setClusterConfig(ClusterConfig clusterConfig) { _clusterConfig = clusterConfig; refreshAbnormalStateResolverMap(_clusterConfig); updateIdealRuleMap(_clusterConfig); - updateDisabledInstances(getInstanceConfigMap().values(), _clusterConfig); - updateSwappingInstances(getInstanceConfigMap().values(), getEnabledLiveInstances(), + updateInstanceSets(_allInstanceConfigCache.getPropertyMap(), _allLiveInstanceCache.getPropertyMap(), + _clusterConfig); + updateDisabledInstances(getInstanceConfigMap().values(), + getAssignableInstanceConfigMap().values(), _clusterConfig); } @@ -527,23 +641,57 @@ public Map> getIdealStateRules() { } /** - * Returns the LiveInstances for each of the instances that are currently up and running, + * Returns the assignable LiveInstances for each of the instances that are currently up and running, * excluding the instances that are considered offline during maintenance mode. Instances * are timed-out if they have been offline for a while before going live during maintenance mode. + * @return A map of LiveInstances to their instance names */ - public Map getLiveInstances() { + public Map getAssignableLiveInstances() { if (isMaintenanceModeEnabled()) { - return _liveInstanceExcludeTimedOutForMaintenance; + return Collections.unmodifiableMap(_liveInstanceExcludeTimedOutForMaintenance); } - return _liveInstanceCache.getPropertyMap(); + return Collections.unmodifiableMap(_assignableLiveInstancesMap); + } + + /** + * Returns the LiveInstances for each of the instances that are currently up and running, + * excluding the instances that are considered offline during maintenance mode. Instances are + * timed-out if they have been offline for a while before going live during maintenance mode. + * + * @return A map of LiveInstances to their instance names + */ + public Map getLiveInstances() { + return _allLiveInstanceCache.getPropertyMap(); + } + + /** + * Return the set of all assignable instances names. + * + * @return A new set contains instance name + */ + public Set getAssignableInstances() { + return _assignableInstanceConfigMap.keySet(); } /** * Return the set of all instances names. + * @return A new set contains instance name */ public Set getAllInstances() { - return _instanceConfigCache.getPropertyMap().keySet(); + return _allInstanceConfigCache.getPropertyMap().keySet(); + } + + /** + * Return all the live nodes that are enabled and assignable + * + * @return A new set contains live instance name and that are marked enabled + */ + public Set getAssignableEnabledLiveInstances() { + Set enabledLiveInstances = new HashSet<>(getAssignableLiveInstances().keySet()); + enabledLiveInstances.removeAll(getDisabledInstances()); + + return enabledLiveInstances; } /** @@ -552,22 +700,50 @@ public Set getAllInstances() { */ public Set getEnabledLiveInstances() { Set enabledLiveInstances = new HashSet<>(getLiveInstances().keySet()); - enabledLiveInstances.removeAll(getDisabledInstances()); + enabledLiveInstances.removeAll(getAssignableDisabledInstances()); return enabledLiveInstances; } + /** + * Return all nodes that are enabled and assignable. + * + * @return A new set contains instance name and that are marked enabled + */ + public Set getAssignableEnabledInstances() { + Set enabledNodes = new HashSet<>(getAssignableInstances()); + enabledNodes.removeAll(getDisabledInstances()); + + return enabledNodes; + } + /** * Return all nodes that are enabled. - * @return + * @return A new set contains instance name and that are marked enabled */ public Set getEnabledInstances() { Set enabledNodes = new HashSet<>(getAllInstances()); - enabledNodes.removeAll(getDisabledInstances()); + enabledNodes.removeAll(getAssignableDisabledInstances()); return enabledNodes; } + /** + * Return all the live nodes that are enabled and assignable and tagged with given instanceTag. + * + * @param instanceTag The instance group tag. + * @return A new set contains live instance name and that are marked enabled and have the + * specified tag. + */ + public Set getAssignableEnabledLiveInstancesWithTag(String instanceTag) { + Set enabledLiveInstancesWithTag = new HashSet<>(getAssignableLiveInstances().keySet()); + Set instancesWithTag = getAssignableInstancesWithTag(instanceTag); + enabledLiveInstancesWithTag.retainAll(instancesWithTag); + enabledLiveInstancesWithTag.removeAll(getDisabledInstances()); + + return enabledLiveInstancesWithTag; + } + /** * Return all the live nodes that are enabled and tagged with given instanceTag. * @param instanceTag The instance group tag. @@ -576,21 +752,38 @@ public Set getEnabledInstances() { */ public Set getEnabledLiveInstancesWithTag(String instanceTag) { Set enabledLiveInstancesWithTag = new HashSet<>(getLiveInstances().keySet()); - Set instancesWithTag = getInstancesWithTag(instanceTag); + Set instancesWithTag = getAssignableInstancesWithTag(instanceTag); enabledLiveInstancesWithTag.retainAll(instancesWithTag); enabledLiveInstancesWithTag.removeAll(getDisabledInstances()); return enabledLiveInstancesWithTag; } + /** + * Return all the nodes that are assignable and tagged with given instance tag. + * + * @param instanceTag The instance group tag. + */ + public Set getAssignableInstancesWithTag(String instanceTag) { + Set taggedInstances = new HashSet<>(); + for (String instance : _assignableInstanceConfigMap.keySet()) { + InstanceConfig instanceConfig = _allInstanceConfigCache.getPropertyByName(instance); + if (instanceConfig != null && instanceConfig.containsTag(instanceTag)) { + taggedInstances.add(instance); + } + } + + return taggedInstances; + } + /** * Return all the nodes that are tagged with given instance tag. * @param instanceTag The instance group tag. */ public Set getInstancesWithTag(String instanceTag) { Set taggedInstances = new HashSet<>(); - for (String instance : _instanceConfigCache.getPropertyMap().keySet()) { - InstanceConfig instanceConfig = _instanceConfigCache.getPropertyByName(instance); + for (String instance : _assignableInstanceConfigMap.keySet()) { + InstanceConfig instanceConfig = _allInstanceConfigCache.getPropertyByName(instance); if (instanceConfig != null && instanceConfig.containsTag(instanceTag)) { taggedInstances.add(instance); } @@ -625,6 +818,15 @@ public Set getDisabledInstances() { return Collections.unmodifiableSet(_disabledInstanceSet); } + /** + * This method allows one to fetch the set of nodes that are disabled + * + * @return + */ + public Set getAssignableDisabledInstances() { + return Collections.unmodifiableSet(_assignableDisabledInstanceSet); + } + /** * Get all swapping instance pairs. * @@ -644,7 +846,9 @@ public Set getEnabledLiveSwapInInstanceNames() { } public synchronized void setLiveInstances(List liveInstances) { - _liveInstanceCache.setPropertyMap(HelixProperty.convertListToMap(liveInstances)); + _allLiveInstanceCache.setPropertyMap(HelixProperty.convertListToMap(liveInstances)); + updateInstanceSets(_allInstanceConfigCache.getPropertyMap(), _allLiveInstanceCache.getPropertyMap(), + _clusterConfig); _updateInstanceOfflineTime = true; } @@ -762,11 +966,20 @@ public IdealState getIdealState(String resourceName) { } /** - * Returns the instance config map - * @return + * Returns the instance config map for all assignable instances. + * + * @return a map of instance name to instance config + */ + public Map getAssignableInstanceConfigMap() { + return Collections.unmodifiableMap(_assignableInstanceConfigMap); + } + + /** + * Returns the instance config map for all assignable instances. + * @return a map of instance name to instance config */ public Map getInstanceConfigMap() { - return _instanceConfigCache.getPropertyMap(); + return _allInstanceConfigCache.getPropertyMap(); } /** @@ -774,9 +987,11 @@ public Map getInstanceConfigMap() { * @param instanceConfigMap */ public void setInstanceConfigMap(Map instanceConfigMap) { - _instanceConfigCache.setPropertyMap(instanceConfigMap); - updateDisabledInstances(instanceConfigMap.values(), getClusterConfig()); - updateSwappingInstances(instanceConfigMap.values(), getEnabledLiveInstances(), + _allInstanceConfigCache.setPropertyMap(instanceConfigMap); + updateInstanceSets(_allInstanceConfigCache.getPropertyMap(), _allLiveInstanceCache.getPropertyMap(), + getClusterConfig()); + updateDisabledInstances(getInstanceConfigMap().values(), + getAssignableInstanceConfigMap().values(), getClusterConfig()); } @@ -839,8 +1054,8 @@ private void updateOfflineInstanceHistory(HelixDataAccessor accessor) { if (!_updateInstanceOfflineTime) { return; } - List offlineNodes = new ArrayList<>(_instanceConfigCache.getPropertyMap().keySet()); - offlineNodes.removeAll(_liveInstanceCache.getPropertyMap().keySet()); + List offlineNodes = new ArrayList<>(_allInstanceConfigCache.getPropertyMap().keySet()); + offlineNodes.removeAll(_allLiveInstanceCache.getPropertyMap().keySet()); _instanceOfflineTimeMap = new HashMap<>(); for (String instance : offlineNodes) { @@ -866,15 +1081,18 @@ private void updateOfflineInstanceHistory(HelixDataAccessor accessor) { _updateInstanceOfflineTime = false; } - private void updateDisabledInstances(Collection instanceConfigs, - ClusterConfig clusterConfig) { + private void updateDisabledInstances(Collection allInstanceConfigs, + Collection assignableInstanceConfigs, ClusterConfig clusterConfig) { // Move the calculating disabled instances to refresh _disabledInstanceForPartitionMap.clear(); _disabledInstanceSet.clear(); - for (InstanceConfig config : instanceConfigs) { + for (InstanceConfig config : allInstanceConfigs) { Map> disabledPartitionMap = config.getDisabledPartitionsMap(); if (!InstanceValidationUtil.isInstanceEnabled(config, clusterConfig)) { _disabledInstanceSet.add(config.getInstanceName()); + if (assignableInstanceConfigs.contains(config)) { + _assignableDisabledInstanceSet.add(config.getInstanceName()); + } } for (String resource : disabledPartitionMap.keySet()) { _disabledInstanceForPartitionMap.putIfAbsent(resource, new HashMap<>()); @@ -886,49 +1104,6 @@ private void updateDisabledInstances(Collection instanceConfigs, } } - private void updateSwappingInstances(Collection instanceConfigs, - Set liveEnabledInstances, ClusterConfig clusterConfig) { - _swapOutInstanceNameToSwapInInstanceName.clear(); - _enabledLiveSwapInInstanceNames.clear(); - - if (clusterConfig == null) { - logger.warn("Skip refreshing swapping instances because clusterConfig is null."); - return; - } - - ClusterTopologyConfig clusterTopologyConfig = - ClusterTopologyConfig.createFromClusterConfig(clusterConfig); - - Map swapOutLogicalIdsByInstanceName = new HashMap<>(); - Map swapInInstancesByLogicalId = new HashMap<>(); - instanceConfigs.forEach(instanceConfig -> { - if (instanceConfig == null) { - return; - } - if (instanceConfig.getInstanceOperation() - .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name())) { - swapOutLogicalIdsByInstanceName.put(instanceConfig.getInstanceName(), - instanceConfig.getLogicalId(clusterTopologyConfig.getEndNodeType())); - } - if (instanceConfig.getInstanceOperation() - .equals(InstanceConstants.InstanceOperation.SWAP_IN.name())) { - swapInInstancesByLogicalId.put( - instanceConfig.getLogicalId(clusterTopologyConfig.getEndNodeType()), - instanceConfig.getInstanceName()); - } - }); - - swapOutLogicalIdsByInstanceName.forEach((swapOutInstanceName, value) -> { - String swapInInstanceName = swapInInstancesByLogicalId.get(value); - if (swapInInstanceName != null) { - _swapOutInstanceNameToSwapInInstanceName.put(swapOutInstanceName, swapInInstanceName); - if (liveEnabledInstances.contains(swapInInstanceName)) { - _enabledLiveSwapInInstanceNames.add(swapInInstanceName); - } - } - }); - } - /* * Check if the instance is timed-out during maintenance mode. An instance is timed-out if it has * been offline for longer than the user defined timeout window. @@ -1101,10 +1276,16 @@ public PauseSignal getPauseSignal() { protected StringBuilder genCacheContentStringBuilder() { StringBuilder sb = new StringBuilder(); - sb.append(String.format("liveInstaceMap: %s", _liveInstanceCache.getPropertyMap())).append("\n"); + sb.append(String.format("liveInstaceMap: %s", _allLiveInstanceCache.getPropertyMap())) + .append("\n"); + sb.append(String.format("assignableLiveInstaceMap: %s", _assignableLiveInstancesMap)) + .append("\n"); sb.append(String.format("idealStateMap: %s", _idealStateCache.getPropertyMap())).append("\n"); sb.append(String.format("stateModelDefMap: %s", _stateModelDefinitionCache.getPropertyMap())).append("\n"); - sb.append(String.format("instanceConfigMap: %s", _instanceConfigCache.getPropertyMap())).append("\n"); + sb.append(String.format("instanceConfigMap: %s", _allInstanceConfigCache.getPropertyMap())) + .append("\n"); + sb.append(String.format("assignableInstanceConfigMap: %s", _assignableInstanceConfigMap)) + .append("\n"); sb.append(String.format("resourceConfigMap: %s", _resourceConfigCache.getPropertyMap())).append("\n"); sb.append(String.format("messageCache: %s", _instanceMessagesCache)).append("\n"); sb.append(String.format("currentStateCache: %s", _currentStateCache)).append("\n"); @@ -1113,7 +1294,7 @@ protected StringBuilder genCacheContentStringBuilder() { } protected PropertyCache getLiveInstanceCache() { - return _liveInstanceCache; + return _allLiveInstanceCache; } @Override diff --git a/helix-core/src/main/java/org/apache/helix/controller/dataproviders/WorkflowControllerDataProvider.java b/helix-core/src/main/java/org/apache/helix/controller/dataproviders/WorkflowControllerDataProvider.java index 96894e82df..3a71e777bf 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/dataproviders/WorkflowControllerDataProvider.java +++ b/helix-core/src/main/java/org/apache/helix/controller/dataproviders/WorkflowControllerDataProvider.java @@ -26,7 +26,6 @@ import org.apache.helix.HelixConstants; import org.apache.helix.HelixDataAccessor; -import org.apache.helix.common.caches.TaskCurrentStateCache; import org.apache.helix.model.CurrentState; import org.apache.helix.zookeeper.datamodel.ZNRecord; import org.apache.helix.common.caches.AbstractDataCache; @@ -164,7 +163,7 @@ public void setParticipantActiveTaskCount(String instance, int taskCount) { */ public void resetActiveTaskCount(CurrentStateOutput currentStateOutput) { // init participant map - for (String liveInstance : getLiveInstances().keySet()) { + for (String liveInstance : getAssignableLiveInstances().keySet()) { _participantActiveTaskCount.put(liveInstance, 0); } // Active task == init and running tasks diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java index 1d8cb5d6c5..7a23b8f280 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java @@ -102,9 +102,10 @@ public ResourceAssignment computeBestPossiblePartitionState(T cache, IdealState Set disabledInstancesForPartition = cache.getDisabledInstancesForPartition(resource.getResourceName(), partition.toString()); List preferenceList = getPreferenceList(partition, idealState, - Collections.unmodifiableSet(cache.getLiveInstances().keySet())); + Collections.unmodifiableSet(cache.getAssignableLiveInstances().keySet())); Map bestStateForPartition = - computeBestPossibleStateForPartition(cache.getLiveInstances().keySet(), stateModelDef, + computeBestPossibleStateForPartition(cache.getAssignableLiveInstances().keySet(), + stateModelDef, preferenceList, currentStateOutput, disabledInstancesForPartition, idealState, cache.getClusterConfig(), partition, cache.getAbnormalStateResolver(stateModelDefName), cache); @@ -392,6 +393,12 @@ protected Map computeBestPossibleMap(List preferenceList * transition to the top-state, which could minimize the impact to the application's availability. * To achieve that, we sort the preferenceList based on CurrentState, by treating top-state and * second-states with same priority and rely on the fact that Collections.sort() is stable. + * @param preferenceList List of instances the replica will be placed on + * @param stateModelDef State model definition + * @param currentStateMap Current state of each replica + * @param liveInstances Set of live instances + * @param disabledInstancesForPartition Set of disabled instances for the partition + * @param bestPossibleStateMap Output map of for the partition */ private void assignStatesToInstances(final List preferenceList, final StateModelDefinition stateModelDef, final Map currentStateMap, diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AutoRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AutoRebalancer.java index 78bbbba280..5c3ff3b9e9 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AutoRebalancer.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AutoRebalancer.java @@ -72,15 +72,15 @@ public IdealState computeNewIdealState(String resourceName, LOG.error("State Model Definition null for resource: " + resourceName); throw new HelixException("State Model Definition null for resource: " + resourceName); } - Map liveInstance = clusterData.getLiveInstances(); - int replicas = currentIdealState.getReplicaCount(liveInstance.size()); + Map assignableLiveInstance = clusterData.getAssignableLiveInstances(); + int replicas = currentIdealState.getReplicaCount(assignableLiveInstance.size()); LinkedHashMap stateCountMap = stateModelDef - .getStateCountMap(liveInstance.size(), replicas); - List liveNodes = new ArrayList<>(liveInstance.keySet()); - List allNodes = new ArrayList<>(clusterData.getAllInstances()); - allNodes.removeAll(clusterData.getDisabledInstances()); - liveNodes.retainAll(allNodes); + .getStateCountMap(assignableLiveInstance.size(), replicas); + List assignableLiveNodes = new ArrayList<>(assignableLiveInstance.keySet()); + List assignableNodes = new ArrayList<>(clusterData.getAssignableInstances()); + assignableNodes.removeAll(clusterData.getDisabledInstances()); + assignableLiveNodes.retainAll(assignableNodes); Map> currentMapping = currentMapping(currentStateOutput, resourceName, partitions, stateCountMap); @@ -89,11 +89,11 @@ public IdealState computeNewIdealState(String resourceName, Set taggedNodes = new HashSet(); Set taggedLiveNodes = new HashSet(); if (currentIdealState.getInstanceGroupTag() != null) { - for (String instanceName : allNodes) { - if (clusterData.getInstanceConfigMap().get(instanceName) + for (String instanceName : assignableNodes) { + if (clusterData.getAssignableInstanceConfigMap().get(instanceName) .containsTag(currentIdealState.getInstanceGroupTag())) { taggedNodes.add(instanceName); - if (liveInstance.containsKey(instanceName)) { + if (assignableLiveInstance.containsKey(instanceName)) { taggedLiveNodes.add(instanceName); } } @@ -114,25 +114,25 @@ public IdealState computeNewIdealState(String resourceName, LOG.warn("Resource " + resourceName + " has tag " + currentIdealState.getInstanceGroupTag() + " but no live participants have this tag"); } - allNodes = new ArrayList<>(taggedNodes); - liveNodes = new ArrayList<>(taggedLiveNodes); + assignableNodes = new ArrayList<>(taggedNodes); + assignableLiveNodes = new ArrayList<>(taggedLiveNodes); } // sort node lists to ensure consistent preferred assignments - Collections.sort(allNodes); - Collections.sort(liveNodes); + Collections.sort(assignableNodes); + Collections.sort(assignableLiveNodes); int maxPartition = currentIdealState.getMaxPartitionsPerInstance(); _rebalanceStrategy = getRebalanceStrategy(currentIdealState.getRebalanceStrategy(), partitions, resourceName, stateCountMap, maxPartition); ZNRecord newMapping = _rebalanceStrategy - .computePartitionAssignment(allNodes, liveNodes, currentMapping, clusterData); + .computePartitionAssignment(assignableNodes, assignableLiveNodes, currentMapping, clusterData); LOG.debug("currentMapping: {}", currentMapping); LOG.debug("stateCountMap: {}", stateCountMap); - LOG.debug("liveNodes: {}", liveNodes); - LOG.debug("allNodes: {}", allNodes); + LOG.debug("assignableLiveNodes: {}", assignableLiveNodes); + LOG.debug("assignableNodes: {}", assignableNodes); LOG.debug("maxPartition: {}", maxPartition); LOG.debug("newMapping: {}", newMapping); diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/CustomRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/CustomRebalancer.java index 7ed2e70b08..939d94aedf 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/CustomRebalancer.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/CustomRebalancer.java @@ -127,7 +127,7 @@ private Map computeCustomizedBestStateForPartition( return instanceStateMap; } - Map liveInstancesMap = cache.getLiveInstances(); + Map assignableLiveInstancesMap = cache.getAssignableLiveInstances(); for (String instance : idealStateMap.keySet()) { boolean notInErrorState = currentStateMap != null && !HelixDefinedState.ERROR.toString().equals(currentStateMap.get(instance)); @@ -135,7 +135,7 @@ private Map computeCustomizedBestStateForPartition( // Note: if instance is not live, the mapping for that instance will not show up in // BestPossibleMapping (and ExternalView) - if (liveInstancesMap.containsKey(instance) && notInErrorState) { + if (assignableLiveInstancesMap.containsKey(instance) && notInErrorState) { if (enabled) { instanceStateMap.put(instance, idealStateMap.get(instance)); } else { diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java index 442ddfb029..78793ddd9d 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/DelayedAutoRebalancer.java @@ -40,7 +40,6 @@ import org.apache.helix.controller.rebalancer.util.WagedValidationUtil; import org.apache.helix.controller.stages.CurrentStateOutput; import org.apache.helix.model.ClusterConfig; -import org.apache.helix.model.ClusterTopologyConfig; import org.apache.helix.model.IdealState; import org.apache.helix.model.Partition; import org.apache.helix.model.Resource; @@ -95,51 +94,46 @@ public IdealState computeNewIdealState(String resourceName, } } - Set liveEnabledNodes; - Set allNodes; + Set assignableLiveEnabledNodes; + Set assignableNodes; String instanceTag = currentIdealState.getInstanceGroupTag(); if (instanceTag != null) { - liveEnabledNodes = clusterData.getEnabledLiveInstancesWithTag(instanceTag); - allNodes = clusterData.getInstancesWithTag(instanceTag); + assignableLiveEnabledNodes = clusterData.getAssignableEnabledLiveInstancesWithTag(instanceTag); + assignableNodes = clusterData.getAssignableInstancesWithTag(instanceTag); if (LOG.isInfoEnabled()) { LOG.info(String.format( "Found the following participants with tag %s for %s: " + "instances: %s, liveEnabledInstances: %s", - currentIdealState.getInstanceGroupTag(), resourceName, allNodes, liveEnabledNodes)); + currentIdealState.getInstanceGroupTag(), resourceName, assignableNodes, assignableLiveEnabledNodes)); } } else { - liveEnabledNodes = clusterData.getEnabledLiveInstances(); - allNodes = clusterData.getAllInstances(); + assignableLiveEnabledNodes = clusterData.getAssignableEnabledLiveInstances(); + assignableNodes = clusterData.getAssignableInstances(); } - Set allNodesDeduped = DelayedRebalanceUtil.filterOutInstancesWithDuplicateLogicalIds( - ClusterTopologyConfig.createFromClusterConfig(clusterConfig), - clusterData.getInstanceConfigMap(), allNodes); - // Remove the non-selected instances with duplicate logicalIds from liveEnabledNodes - // This ensures the same duplicate instance is kept in both allNodesDeduped and liveEnabledNodes - liveEnabledNodes.retainAll(allNodesDeduped); - long delay = DelayedRebalanceUtil.getRebalanceDelay(currentIdealState, clusterConfig); - Set activeNodes = DelayedRebalanceUtil - .getActiveNodes(allNodesDeduped, currentIdealState, liveEnabledNodes, - clusterData.getInstanceOfflineTimeMap(), clusterData.getLiveInstances().keySet(), - clusterData.getInstanceConfigMap(), delay, clusterConfig); + Set activeNodes = + DelayedRebalanceUtil.getActiveNodes(assignableNodes, currentIdealState, assignableLiveEnabledNodes, + clusterData.getInstanceOfflineTimeMap(), + clusterData.getAssignableLiveInstances().keySet(), + clusterData.getAssignableInstanceConfigMap(), delay, clusterConfig); if (delayRebalanceEnabled) { Set offlineOrDisabledInstances = new HashSet<>(activeNodes); - offlineOrDisabledInstances.removeAll(liveEnabledNodes); + offlineOrDisabledInstances.removeAll(assignableLiveEnabledNodes); DelayedRebalanceUtil.setRebalanceScheduler(currentIdealState.getResourceName(), true, offlineOrDisabledInstances, clusterData.getInstanceOfflineTimeMap(), - clusterData.getLiveInstances().keySet(), clusterData.getInstanceConfigMap(), delay, + clusterData.getAssignableLiveInstances().keySet(), + clusterData.getAssignableInstanceConfigMap(), delay, clusterConfig, _manager); } - if (allNodesDeduped.isEmpty() || activeNodes.isEmpty()) { + if (assignableNodes.isEmpty() || activeNodes.isEmpty()) { LOG.error(String.format( "No instances or active instances available for resource %s, " - + "allInstances: %s, liveInstances: %s, activeInstances: %s", - resourceName, allNodesDeduped, liveEnabledNodes, activeNodes)); + + "allInstances: %s, liveInstances: %s, activeInstances: %s", resourceName, assignableNodes, + assignableLiveEnabledNodes, activeNodes)); return generateNewIdealState(resourceName, currentIdealState, emptyMapping(currentIdealState)); } @@ -165,14 +159,15 @@ public IdealState computeNewIdealState(String resourceName, getRebalanceStrategy(currentIdealState.getRebalanceStrategy(), allPartitions, resourceName, stateCountMap, maxPartition); - List allNodeList = new ArrayList<>(allNodesDeduped); + List allNodeList = new ArrayList<>(assignableNodes); // TODO: Currently we have 2 groups of instances and compute preference list twice and merge. // Eventually we want to have exclusive groups of instance for different instance tag. List liveEnabledAssignableNodeList = new ArrayList<>( // We will not assign partitions to instances with EVACUATE InstanceOperation. - DelayedRebalanceUtil.filterOutEvacuatingInstances(clusterData.getInstanceConfigMap(), - liveEnabledNodes)); + DelayedRebalanceUtil.filterOutEvacuatingInstances( + clusterData.getAssignableInstanceConfigMap(), + assignableLiveEnabledNodes)); // sort node lists to ensure consistent preferred assignments Collections.sort(allNodeList); Collections.sort(liveEnabledAssignableNodeList); @@ -194,29 +189,16 @@ public IdealState computeNewIdealState(String resourceName, _rebalanceStrategy.computePartitionAssignment(allNodeList, activeNodeList, currentMapping, clusterData); finalMapping = getFinalDelayedMapping(currentIdealState, newIdealMapping, newActiveMapping, - liveEnabledNodes, replicaCount, minActiveReplicas); + assignableLiveEnabledNodes, replicaCount, minActiveReplicas); } finalMapping.getListFields().putAll(userDefinedPreferenceList); - // 1. Get all SWAP_OUT instances and corresponding SWAP_IN instance pairs in the cluster. - Map swapOutToSwapInInstancePairs = - clusterData.getSwapOutToSwapInInstancePairs(); - // 2. Get all enabled and live SWAP_IN instances in the cluster. - Set enabledLiveSwapInInstances = clusterData.getEnabledLiveSwapInInstanceNames(); - // 3. For each SWAP_OUT instance in any of the preferenceLists, add the corresponding SWAP_IN instance to the end. - // Skipping this when there are not SWAP_IN instances ready(enabled and live) will reduce computation time when there is not an active - // swap occurring. - if (!clusterData.getEnabledLiveSwapInInstanceNames().isEmpty()) { - DelayedRebalanceUtil.addSwapInInstanceToPreferenceListsIfSwapOutInstanceExists(finalMapping, - swapOutToSwapInInstancePairs, enabledLiveSwapInInstances); - } - LOG.debug("currentMapping: {}", currentMapping); LOG.debug("stateCountMap: {}", stateCountMap); - LOG.debug("liveEnabledNodes: {}", liveEnabledNodes); + LOG.debug("assignableLiveEnabledNodes: {}", assignableLiveEnabledNodes); LOG.debug("activeNodes: {}", activeNodes); - LOG.debug("allNodes: {}", allNodesDeduped); + LOG.debug("assignableNodes: {}", assignableNodes); LOG.debug("maxPartition: {}", maxPartition); LOG.debug("newIdealMapping: {}", newIdealMapping); LOG.debug("finalMapping: {}", finalMapping); @@ -274,14 +256,15 @@ public ResourceAssignment computeBestPossiblePartitionState(ResourceControllerDa LOG.debug("Processing resource:" + resource.getResourceName()); } - Set allNodes = cache.getEnabledInstances(); - Set liveNodes = cache.getLiveInstances().keySet(); + Set allNodes = cache.getAssignableEnabledInstances(); + Set liveNodes = cache.getAssignableLiveInstances().keySet(); ClusterConfig clusterConfig = cache.getClusterConfig(); long delayTime = DelayedRebalanceUtil.getRebalanceDelay(idealState, clusterConfig); Set activeNodes = DelayedRebalanceUtil .getActiveNodes(allNodes, idealState, liveNodes, cache.getInstanceOfflineTimeMap(), - cache.getLiveInstances().keySet(), cache.getInstanceConfigMap(), delayTime, + cache.getAssignableLiveInstances().keySet(), cache.getAssignableInstanceConfigMap(), + delayTime, clusterConfig); String stateModelDefName = idealState.getStateModelDefRef(); diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/AbstractEvenDistributionRebalanceStrategy.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/AbstractEvenDistributionRebalanceStrategy.java index d4922b9d63..7750bd70b0 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/AbstractEvenDistributionRebalanceStrategy.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/AbstractEvenDistributionRebalanceStrategy.java @@ -87,7 +87,7 @@ public ZNRecord computePartitionAssignment(final List allNodes, final List liveNodes, final Map> currentMapping, ResourceControllerDataProvider clusterData) { // validate the instance configs - Map instanceConfigMap = clusterData.getInstanceConfigMap(); + Map instanceConfigMap = clusterData.getAssignableInstanceConfigMap(); if (instanceConfigMap == null || !instanceConfigMap.keySet().containsAll(allNodes)) { throw new HelixException(String.format("Config for instances %s is not found!", allNodes.removeAll(instanceConfigMap.keySet()))); @@ -116,7 +116,8 @@ private ZNRecord computeBestPartitionAssignment(List allNodes, List> finalPartitionMap = null; Topology allNodeTopo = - new Topology(allNodes, allNodes, clusterData.getInstanceConfigMap(), clusterData.getClusterConfig()); + new Topology(allNodes, allNodes, clusterData.getAssignableInstanceConfigMap(), + clusterData.getClusterConfig()); // Transform current assignment to instance->partitions map, and get total partitions Map> nodeToPartitionMap = convertPartitionMap(origPartitionMap, allNodeTopo); diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/ConstraintRebalanceStrategy.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/ConstraintRebalanceStrategy.java index b52ef98526..783af13f08 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/ConstraintRebalanceStrategy.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/ConstraintRebalanceStrategy.java @@ -154,7 +154,7 @@ public ZNRecord computePartitionAssignment(final List allNodes, // Since instance weight will be replaced by constraint evaluation, record it in advance to avoid // overwriting. Map instanceWeightRecords = new HashMap<>(); - for (InstanceConfig instanceConfig : clusterData.getInstanceConfigMap().values()) { + for (InstanceConfig instanceConfig : clusterData.getAssignableInstanceConfigMap().values()) { if (instanceConfig.getWeight() != InstanceConfig.WEIGHT_NOT_SET) { instanceWeightRecords.put(instanceConfig.getInstanceName(), instanceConfig.getWeight()); } @@ -163,7 +163,7 @@ public ZNRecord computePartitionAssignment(final List allNodes, List candidates = new ArrayList<>(allNodes); // Only calculate for configured nodes. // Remove all non-configured nodes. - candidates.retainAll(clusterData.getAllInstances()); + candidates.retainAll(clusterData.getAssignableInstances()); // For generating the IdealState ZNRecord Map> preferenceList = new HashMap<>(); @@ -207,7 +207,7 @@ public ZNRecord computePartitionAssignment(final List allNodes, // recover the original weight for (String instanceName : instanceWeightRecords.keySet()) { - clusterData.getInstanceConfigMap().get(instanceName) + clusterData.getAssignableInstanceConfigMap().get(instanceName) .setWeight(instanceWeightRecords.get(instanceName)); } @@ -297,7 +297,7 @@ private List computeSinglePartitionAssignment(String partitionName, } // Limit the weight to be at least MIN_INSTANCE_WEIGHT for (int i = 0; i < instancePriority.length; i++) { - clusterData.getInstanceConfigMap().get(qualifiedNodes.get(i)) + clusterData.getAssignableInstanceConfigMap().get(qualifiedNodes.get(i)) .setWeight(instancePriority[i] - baseline + MIN_INSTANCE_WEIGHT); } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/CrushRebalanceStrategy.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/CrushRebalanceStrategy.java index 38d47abbe4..08bbaaffa0 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/CrushRebalanceStrategy.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/CrushRebalanceStrategy.java @@ -75,7 +75,7 @@ public void init(String resourceName, final List partitions, public ZNRecord computePartitionAssignment(final List allNodes, final List liveNodes, final Map> currentMapping, ResourceControllerDataProvider clusterData) throws HelixException { - Map instanceConfigMap = clusterData.getInstanceConfigMap(); + Map instanceConfigMap = clusterData.getAssignableInstanceConfigMap(); _clusterTopo = new Topology(allNodes, liveNodes, instanceConfigMap, clusterData.getClusterConfig()); Node topNode = _clusterTopo.getRootNode(); diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/MultiRoundCrushRebalanceStrategy.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/MultiRoundCrushRebalanceStrategy.java index 0a7f4b67b9..a53257f3dd 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/MultiRoundCrushRebalanceStrategy.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/MultiRoundCrushRebalanceStrategy.java @@ -82,7 +82,7 @@ public void init(String resourceName, final List partitions, public ZNRecord computePartitionAssignment(final List allNodes, final List liveNodes, final Map> currentMapping, ResourceControllerDataProvider clusterData) throws HelixException { - Map instanceConfigMap = clusterData.getInstanceConfigMap(); + Map instanceConfigMap = clusterData.getAssignableInstanceConfigMap(); _clusterTopo = new Topology(allNodes, liveNodes, instanceConfigMap, clusterData.getClusterConfig()); Node root = _clusterTopo.getRootNode(); diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java index c7066d053d..f4fb26541e 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/util/DelayedRebalanceUtil.java @@ -141,92 +141,6 @@ public static Set filterOutEvacuatingInstances(Map filterOutInstancesWithDuplicateLogicalIds( - ClusterTopologyConfig clusterTopologyConfig, Map instanceConfigMap, - Set instances) { - Set filteredNodes = new HashSet<>(); - Map filteredInstancesByLogicalId = new HashMap<>(); - - instances.forEach(node -> { - InstanceConfig thisInstanceConfig = instanceConfigMap.get(node); - if (thisInstanceConfig == null) { - return; - } - String thisLogicalId = - thisInstanceConfig.getLogicalId(clusterTopologyConfig.getEndNodeType()); - - if (filteredInstancesByLogicalId.containsKey(thisLogicalId)) { - InstanceConfig filteredDuplicateInstanceConfig = - instanceConfigMap.get(filteredInstancesByLogicalId.get(thisLogicalId)); - if ((filteredDuplicateInstanceConfig.getInstanceOperation() - .equals(InstanceConstants.InstanceOperation.SWAP_IN.name()) - && thisInstanceConfig.getInstanceOperation() - .equals(InstanceConstants.InstanceOperation.SWAP_OUT.name())) - || thisInstanceConfig.getInstanceOperation().isEmpty()) { - // If the already filtered instance is SWAP_IN and this instance is in SWAP_OUT, then replace the filtered - // instance with this instance. If this instance has no InstanceOperation, then replace the filtered instance - // with this instance. This is the case where the SWAP_IN node has been marked as complete or SWAP_IN exists and - // SWAP_OUT does not. There can never be a case where both have no InstanceOperation set. - filteredNodes.remove(filteredInstancesByLogicalId.get(thisLogicalId)); - filteredNodes.add(node); - filteredInstancesByLogicalId.put(thisLogicalId, node); - } - } else { - filteredNodes.add(node); - filteredInstancesByLogicalId.put(thisLogicalId, node); - } - }); - - return filteredNodes; - } - - /** - * Look through the provided mapping and add corresponding SWAP_IN node if a SWAP_OUT node exists - * in the partition's preference list. - * - * @param mapping the mapping to be updated (IdealState ZNRecord) - * @param swapOutToSwapInInstancePairs the map of SWAP_OUT to SWAP_IN instances - */ - public static void addSwapInInstanceToPreferenceListsIfSwapOutInstanceExists(ZNRecord mapping, - Map swapOutToSwapInInstancePairs, Set enabledLiveSwapInInstances) { - Map> preferenceListsByPartition = mapping.getListFields(); - for (String partition : preferenceListsByPartition.keySet()) { - List preferenceList = preferenceListsByPartition.get(partition); - if (preferenceList == null) { - continue; - } - List newInstancesToAdd = new ArrayList<>(); - for (String instanceName : preferenceList) { - if (swapOutToSwapInInstancePairs.containsKey(instanceName) - && enabledLiveSwapInInstances.contains( - swapOutToSwapInInstancePairs.get(instanceName))) { - String swapInInstanceName = swapOutToSwapInInstancePairs.get(instanceName); - if (!preferenceList.contains(swapInInstanceName) && !newInstancesToAdd.contains( - swapInInstanceName)) { - newInstancesToAdd.add(swapInInstanceName); - } - } - } - if (!newInstancesToAdd.isEmpty()) { - preferenceList.addAll(newInstancesToAdd); - } - } - } - /** * Return the time when an offline or disabled instance should be treated as inactive. Return -1 * if it is inactive now or forced to be rebalanced by an on-demand rebalance. @@ -429,8 +343,8 @@ public static Set findToBeAssignedReplicasForMinActiveReplica // keep all current assignment and add to allocated replicas resourceAssignment.getMappedPartitions().forEach(partition -> - resourceAssignment.getReplicaMap(partition).forEach((instance, state) -> - allocatedReplicas.computeIfAbsent(instance, key -> new HashSet<>()) + resourceAssignment.getReplicaMap(partition).forEach((logicalId, state) -> + allocatedReplicas.computeIfAbsent(logicalId, key -> new HashSet<>()) .add(new AssignableReplica(clusterData.getClusterConfig(), mergedResourceConfig, partition.getPartitionName(), state, statePriorityMap.get(state))))); // only proceed for resource requiring delayed rebalance overwrites @@ -505,7 +419,7 @@ private static List findPartitionsMissingMinActiveReplica( ResourceAssignment resourceAssignment) { String resourceName = resourceAssignment.getResourceName(); IdealState currentIdealState = clusterData.getIdealState(resourceName); - Set enabledLiveInstances = clusterData.getEnabledLiveInstances(); + Set enabledLiveInstances = clusterData.getAssignableEnabledLiveInstances(); int numReplica = currentIdealState.getReplicaCount(enabledLiveInstances.size()); int minActiveReplica = DelayedRebalanceUtil.getMinActiveReplica(ResourceConfig .mergeIdealStateWithResourceConfig(clusterData.getResourceConfig(resourceName), @@ -526,7 +440,7 @@ private static List findPartitionsMissingMinActiveReplica( private static int getMinActiveReplica(ResourceControllerDataProvider clusterData, String resourceName) { IdealState currentIdealState = clusterData.getIdealState(resourceName); - Set enabledLiveInstances = clusterData.getEnabledLiveInstances(); + Set enabledLiveInstances = clusterData.getAssignableEnabledLiveInstances(); int numReplica = currentIdealState.getReplicaCount(enabledLiveInstances.size()); return DelayedRebalanceUtil.getMinActiveReplica(ResourceConfig .mergeIdealStateWithResourceConfig(clusterData.getResourceConfig(resourceName), diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/GlobalRebalanceRunner.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/GlobalRebalanceRunner.java index 6c199bc1be..d710425cf1 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/GlobalRebalanceRunner.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/GlobalRebalanceRunner.java @@ -20,6 +20,7 @@ */ import com.google.common.collect.ImmutableSet; + import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutionException; @@ -30,12 +31,12 @@ import org.apache.helix.HelixRebalanceException; import org.apache.helix.controller.changedetector.ResourceChangeDetector; import org.apache.helix.controller.dataproviders.ResourceControllerDataProvider; -import org.apache.helix.controller.rebalancer.util.DelayedRebalanceUtil; import org.apache.helix.controller.rebalancer.util.WagedRebalanceUtil; import org.apache.helix.controller.rebalancer.waged.model.ClusterModel; import org.apache.helix.controller.rebalancer.waged.model.ClusterModelProvider; import org.apache.helix.controller.stages.CurrentStateOutput; import org.apache.helix.model.ClusterTopologyConfig; +import org.apache.helix.model.Partition; import org.apache.helix.model.Resource; import org.apache.helix.model.ResourceAssignment; import org.apache.helix.monitoring.metrics.MetricCollector; @@ -111,6 +112,7 @@ public void globalRebalance(ResourceControllerDataProvider clusterData, Map> clusterChanges = _changeDetector.getAllChanges(); + Set allAssignableInstances = clusterData.getAssignableInstances(); if (clusterChanges.keySet().stream().anyMatch(GLOBAL_REBALANCE_REQUIRED_CHANGE_TYPES::contains)) { final boolean waitForGlobalRebalance = !_asyncGlobalRebalanceEnabled; @@ -120,8 +122,8 @@ public void globalRebalance(ResourceControllerDataProvider clusterData, Map resourceMap, + private void doGlobalRebalance(ResourceControllerDataProvider clusterData, + Map resourceMap, Set allAssignableInstances, RebalanceAlgorithm algorithm, CurrentStateOutput currentStateOutput, boolean shouldTriggerMainPipeline, - Map> clusterChanges) throws HelixRebalanceException { + Map> clusterChanges) + throws HelixRebalanceException { LOG.info("Start calculating the new baseline."); _baselineCalcCounter.increment(1L); _baselineCalcLatency.startMeasuringLatency(); @@ -166,14 +170,7 @@ private void doGlobalRebalance(ResourceControllerDataProvider clusterData, Map computeBestPossibleStates( final CurrentStateOutput currentStateOutput, RebalanceAlgorithm algorithm) throws HelixRebalanceException { - Set allNodesDeduped = DelayedRebalanceUtil.filterOutInstancesWithDuplicateLogicalIds( - ClusterTopologyConfig.createFromClusterConfig(clusterData.getClusterConfig()), - clusterData.getInstanceConfigMap(), clusterData.getAllInstances()); - // Remove the non-selected instances with duplicate logicalIds from liveEnabledNodes - // This ensures the same duplicate instance is kept in both allNodesDeduped and liveEnabledNodes - Set liveEnabledNodesDeduped = clusterData.getEnabledLiveInstances(); - liveEnabledNodesDeduped.retainAll(allNodesDeduped); - Set activeNodes = - DelayedRebalanceUtil.getActiveNodes(allNodesDeduped, liveEnabledNodesDeduped, - clusterData.getInstanceOfflineTimeMap(), clusterData.getLiveInstances().keySet(), - clusterData.getInstanceConfigMap(), clusterData.getClusterConfig()); + DelayedRebalanceUtil.getActiveNodes(clusterData.getAssignableInstances(), + clusterData.getAssignableEnabledLiveInstances(), + clusterData.getInstanceOfflineTimeMap(), + clusterData.getAssignableLiveInstances().keySet(), + clusterData.getAssignableInstanceConfigMap(), clusterData.getClusterConfig()); // Schedule (or unschedule) delayed rebalance according to the delayed rebalance config. delayedRebalanceSchedule(clusterData, activeNodes, resourceMap.keySet()); @@ -369,19 +362,6 @@ private Map convertResourceAssignment( newIdealState.setPreferenceLists( getPreferenceLists(assignments.get(resourceName), statePriorityMap)); - // 1. Get all SWAP_OUT instances and corresponding SWAP_IN instance pairs in the cluster. - Map swapOutToSwapInInstancePairs = - clusterData.getSwapOutToSwapInInstancePairs(); - // 2. Get all enabled and live SWAP_IN instances in the cluster. - Set enabledLiveSwapInInstances = clusterData.getEnabledLiveSwapInInstanceNames(); - // 3. For each SWAP_OUT instance in any of the preferenceLists, add the corresponding SWAP_IN instance to the end. - // Skipping this when there are not SWAP_IN instances ready(enabled and live) will reduce computation time when there is not an active - // swap occurring. - if (!clusterData.getEnabledLiveSwapInInstanceNames().isEmpty()) { - DelayedRebalanceUtil.addSwapInInstanceToPreferenceListsIfSwapOutInstanceExists( - newIdealState.getRecord(), swapOutToSwapInInstancePairs, enabledLiveSwapInInstances); - } - // Note the state mapping in the new assignment won't directly propagate to the map fields. // The rebalancer will calculate for the final state mapping considering the current states. finalIdealStateMap.put(resourceName, newIdealState); @@ -419,15 +399,12 @@ private Map handleDelayedRebalanceMinActiveReplica( Set activeNodes, Map currentResourceAssignment, RebalanceAlgorithm algorithm) throws HelixRebalanceException { + // the "real" live nodes at the time - // TODO: this is a hacky way to filter our on operation instance. We should consider redesign `getEnabledLiveInstances()`. - final Set allNodesDeduped = DelayedRebalanceUtil.filterOutInstancesWithDuplicateLogicalIds( - ClusterTopologyConfig.createFromClusterConfig(clusterData.getClusterConfig()), - clusterData.getInstanceConfigMap(), clusterData.getAllInstances()); - final Set enabledLiveInstances = clusterData.getEnabledLiveInstances(); - // Remove the non-selected instances with duplicate logicalIds from liveEnabledNodes - // This ensures the same duplicate instance is kept in both allNodesDeduped and liveEnabledNodes - enabledLiveInstances.retainAll(allNodesDeduped); + // TODO: Move evacuation into BaseControllerDataProvider assignableNode logic. + final Set enabledLiveInstances = DelayedRebalanceUtil.filterOutEvacuatingInstances( + clusterData.getAssignableInstanceConfigMap(), + clusterData.getAssignableEnabledLiveInstances()); if (activeNodes.equals(enabledLiveInstances) || !requireRebalanceOverwrite(clusterData, currentResourceAssignment)) { // no need for additional process, return the current resource assignment @@ -622,12 +599,13 @@ private void delayedRebalanceSchedule(ResourceControllerDataProvider clusterData ClusterConfig clusterConfig = clusterData.getClusterConfig(); boolean delayedRebalanceEnabled = DelayedRebalanceUtil.isDelayRebalanceEnabled(clusterConfig); Set offlineOrDisabledInstances = new HashSet<>(delayedActiveNodes); - offlineOrDisabledInstances.removeAll(clusterData.getEnabledLiveInstances()); + offlineOrDisabledInstances.removeAll(clusterData.getAssignableEnabledLiveInstances()); for (String resource : resourceSet) { DelayedRebalanceUtil .setRebalanceScheduler(resource, delayedRebalanceEnabled, offlineOrDisabledInstances, - clusterData.getInstanceOfflineTimeMap(), clusterData.getLiveInstances().keySet(), - clusterData.getInstanceConfigMap(), clusterConfig.getRebalanceDelayTime(), + clusterData.getInstanceOfflineTimeMap(), + clusterData.getAssignableLiveInstances().keySet(), + clusterData.getAssignableInstanceConfigMap(), clusterConfig.getRebalanceDelayTime(), clusterConfig, _manager); } } else { @@ -642,13 +620,10 @@ protected boolean requireRebalanceOverwrite(ResourceControllerDataProvider clust String resourceName = resourceAssignment.getResourceName(); IdealState currentIdealState = clusterData.getIdealState(resourceName); - Set allNodesDeduped = DelayedRebalanceUtil.filterOutInstancesWithDuplicateLogicalIds( - ClusterTopologyConfig.createFromClusterConfig(clusterData.getClusterConfig()), - clusterData.getInstanceConfigMap(), clusterData.getAllInstances()); - Set enabledLiveInstances = clusterData.getEnabledLiveInstances(); - // Remove the non-selected instances with duplicate logicalIds from liveEnabledNodes - // This ensures the same duplicate instance is kept in both allNodesDeduped and liveEnabledNodes - enabledLiveInstances.retainAll(allNodesDeduped); + // TODO: Move evacuation into BaseControllerDataProvider assignableNode logic. + Set enabledLiveInstances = DelayedRebalanceUtil.filterOutEvacuatingInstances( + clusterData.getAssignableInstanceConfigMap(), + clusterData.getAssignableEnabledLiveInstances()); int numReplica = currentIdealState.getReplicaCount(enabledLiveInstances.size()); int minActiveReplica = DelayedRebalanceUtil.getMinActiveReplica(ResourceConfig diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/AbstractPartitionMovementConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/AbstractPartitionMovementConstraint.java index 913e042340..0fbd0ad3d8 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/AbstractPartitionMovementConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/AbstractPartitionMovementConstraint.java @@ -64,16 +64,16 @@ protected Map getStateMap(AssignableReplica replica, return assignment.get(resourceName).getReplicaMap(new Partition(partitionName)); } - protected double calculateAssignmentScore(String nodeName, String state, + protected double calculateAssignmentScore(String logicalId, String state, Map instanceToStateMap) { - if (instanceToStateMap.containsKey(nodeName)) { + if (instanceToStateMap.containsKey(logicalId)) { // The score when the proposed allocation partially matches the assignment plan but will // require a state transition. double scoreWithStateTransitionCost = MIN_SCORE + (MAX_SCORE - MIN_SCORE) * STATE_TRANSITION_COST_FACTOR; // if state matches, no state transition required for the proposed assignment; if state does // not match, then the proposed assignment requires state transition. - return state.equals(instanceToStateMap.get(nodeName)) ? MAX_SCORE + return state.equals(instanceToStateMap.get(logicalId)) ? MAX_SCORE : scoreWithStateTransitionCost; } return MIN_SCORE; diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/BaselineInfluenceConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/BaselineInfluenceConstraint.java index 5e3fcd2868..8063de34fc 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/BaselineInfluenceConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/BaselineInfluenceConstraint.java @@ -44,7 +44,7 @@ protected double getAssignmentScore(AssignableNode node, AssignableReplica repli Map baselineAssignment = getStateMap(replica, clusterContext.getBaselineAssignment()); - return calculateAssignmentScore(node.getInstanceName(), replica.getReplicaState(), + return calculateAssignmentScore(node.getLogicalId(), replica.getReplicaState(), baselineAssignment); } } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java index 77d56302c1..60d43764a4 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/ConstraintBasedAlgorithm.java @@ -148,10 +148,10 @@ private Optional getNodeWithHighestPoints(AssignableReplica repl if (scoreCompareResult == 0) { // If the evaluation scores of 2 nodes are the same, the algorithm assigns the replica // to the idle node first. - String instanceName1 = nodeEntry1.getKey().getInstanceName(); - String instanceName2 = nodeEntry2.getKey().getInstanceName(); - int idleScore1 = busyInstances.contains(instanceName1) ? 0 : 1; - int idleScore2 = busyInstances.contains(instanceName2) ? 0 : 1; + String logicalId1 = nodeEntry1.getKey().getLogicalId(); + String logicalId2 = nodeEntry2.getKey().getLogicalId(); + int idleScore1 = busyInstances.contains(logicalId1) ? 0 : 1; + int idleScore2 = busyInstances.contains(logicalId2) ? 0 : 1; return idleScore1 != idleScore2 ? (idleScore1 - idleScore2) : -nodeEntry1.getKey().compareTo(nodeEntry2.getKey()); } else { @@ -271,7 +271,7 @@ public int compareTo(AssignableReplicaWithScore replica2) { /** * @param assignments A collection of resource replicas assignment. - * @return A set of instance names that have at least one replica assigned in the input assignments. + * @return A set of logicalIds that have at least one replica assigned in the input assignments. */ private Set getBusyInstances(Collection assignments) { return assignments.stream().flatMap( diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/PartitionMovementConstraint.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/PartitionMovementConstraint.java index 08c135d663..05c7a94ede 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/PartitionMovementConstraint.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/PartitionMovementConstraint.java @@ -40,14 +40,14 @@ protected double getAssignmentScore(AssignableNode node, AssignableReplica repli getStateMap(replica, clusterContext.getBestPossibleAssignment()); Map baselineAssignment = getStateMap(replica, clusterContext.getBaselineAssignment()); - String nodeName = node.getInstanceName(); + String logicalId = node.getLogicalId(); String state = replica.getReplicaState(); if (bestPossibleAssignment.isEmpty()) { // if best possible is missing, it means the replica belongs to a newly added resource, so // baseline assignment should be used instead. - return calculateAssignmentScore(nodeName, state, baselineAssignment); + return calculateAssignmentScore(logicalId, state, baselineAssignment); } - return calculateAssignmentScore(nodeName, state, bestPossibleAssignment); + return calculateAssignmentScore(logicalId, state, bestPossibleAssignment); } } diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java index 3f16732107..a869a904ef 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java @@ -80,7 +80,8 @@ public static ClusterModel generateClusterModelForDelayedRebalanceOverwrites( Set activeInstances, Map resourceAssignment) { return generateClusterModel(dataProvider, resourceMap, activeInstances, Collections.emptyMap(), - Collections.emptyMap(), resourceAssignment, RebalanceScopeType.DELAYED_REBALANCE_OVERWRITES); + Collections.emptyMap(), resourceAssignment, + RebalanceScopeType.DELAYED_REBALANCE_OVERWRITES); } /** @@ -162,8 +163,9 @@ public static ClusterModel generateClusterModelForBaseline( public static ClusterModel generateClusterModelFromExistingAssignment( ResourceControllerDataProvider dataProvider, Map resourceMap, Map currentStateAssignment) { - return generateClusterModel(dataProvider, resourceMap, dataProvider.getEnabledLiveInstances(), - Collections.emptyMap(), Collections.emptyMap(), currentStateAssignment, + return generateClusterModel(dataProvider, resourceMap, + dataProvider.getAssignableEnabledLiveInstances(), Collections.emptyMap(), + Collections.emptyMap(), currentStateAssignment, RebalanceScopeType.GLOBAL_BASELINE); } @@ -187,11 +189,35 @@ private static ClusterModel generateClusterModel(ResourceControllerDataProvider Map> clusterChanges, Map idealAssignment, Map currentAssignment, RebalanceScopeType scopeType) { + Map assignableInstanceConfigMap = dataProvider.getAssignableInstanceConfigMap(); // Construct all the assignable nodes and initialize with the allocated replicas. Set assignableNodes = - getAllAssignableNodes(dataProvider.getClusterConfig(), dataProvider.getInstanceConfigMap(), + getAllAssignableNodes(dataProvider.getClusterConfig(), assignableInstanceConfigMap, activeInstances); + // Generate the logical view of the ideal assignment and the current assignment. + ClusterTopologyConfig clusterTopologyConfig = + ClusterTopologyConfig.createFromClusterConfig(dataProvider.getClusterConfig()); + Map logicalIdIdealAssignment = + idealAssignment.isEmpty() ? idealAssignment + : generateResourceAssignmentMapLogicalIdView(idealAssignment, clusterTopologyConfig, + dataProvider); + Map logicalIdCurrentAssignment = + currentAssignment.isEmpty() ? currentAssignment + : generateResourceAssignmentMapLogicalIdView(currentAssignment, clusterTopologyConfig, + dataProvider); + + // Get the set of active logical ids. + Set activeLogicalIds = activeInstances.stream().map( + instanceName -> assignableInstanceConfigMap.get(instanceName) + .getLogicalId(clusterTopologyConfig.getEndNodeType())).collect(Collectors.toSet()); + + Set assignableLiveInstanceNames = dataProvider.getAssignableLiveInstances().keySet(); + Set assignableLiveInstanceLogicalIds = + assignableLiveInstanceNames.stream().map( + instanceName -> assignableInstanceConfigMap.get(instanceName) + .getLogicalId(clusterTopologyConfig.getEndNodeType())).collect(Collectors.toSet()); + // Generate replica objects for all the resource partitions. // Map> replicaMap = @@ -203,27 +229,28 @@ private static ClusterModel generateClusterModel(ResourceControllerDataProvider Set toBeAssignedReplicas; switch (scopeType) { case GLOBAL_BASELINE: - toBeAssignedReplicas = findToBeAssignedReplicasByClusterChanges(replicaMap, activeInstances, - dataProvider.getLiveInstances().keySet(), clusterChanges, currentAssignment, + toBeAssignedReplicas = + findToBeAssignedReplicasByClusterChanges(replicaMap, activeLogicalIds, + assignableLiveInstanceLogicalIds, clusterChanges, logicalIdCurrentAssignment, allocatedReplicas); break; case PARTIAL: // Filter to remove the replicas that do not exist in the ideal assignment given but exist // in the replicaMap. This is because such replicas are new additions that do not need to be // rebalanced right away. - retainExistingReplicas(replicaMap, idealAssignment); + retainExistingReplicas(replicaMap, logicalIdIdealAssignment); toBeAssignedReplicas = - findToBeAssignedReplicasByComparingWithIdealAssignment(replicaMap, activeInstances, - idealAssignment, currentAssignment, allocatedReplicas); + findToBeAssignedReplicasByComparingWithIdealAssignment(replicaMap, activeLogicalIds, + logicalIdIdealAssignment, logicalIdCurrentAssignment, allocatedReplicas); break; case EMERGENCY: - toBeAssignedReplicas = findToBeAssignedReplicasOnDownInstances(replicaMap, activeInstances, - currentAssignment, allocatedReplicas); + toBeAssignedReplicas = findToBeAssignedReplicasOnDownInstances(replicaMap, activeLogicalIds, + logicalIdCurrentAssignment, allocatedReplicas); break; case DELAYED_REBALANCE_OVERWRITES: toBeAssignedReplicas = DelayedRebalanceUtil.findToBeAssignedReplicasForMinActiveReplica(dataProvider, replicaMap.keySet(), - activeInstances, currentAssignment, allocatedReplicas); + activeLogicalIds, logicalIdCurrentAssignment, allocatedReplicas); break; default: throw new HelixException("Unknown rebalance scope type: " + scopeType); @@ -231,18 +258,56 @@ private static ClusterModel generateClusterModel(ResourceControllerDataProvider // Update the allocated replicas to the assignable nodes. assignableNodes.parallelStream().forEach(node -> node.assignInitBatch( - allocatedReplicas.getOrDefault(node.getInstanceName(), Collections.emptySet()))); + allocatedReplicas.getOrDefault(node.getLogicalId(), Collections.emptySet()))); // Construct and initialize cluster context. ClusterContext context = new ClusterContext( replicaMap.values().stream().flatMap(Set::stream).collect(Collectors.toSet()), - assignableNodes, idealAssignment, currentAssignment); + assignableNodes, logicalIdIdealAssignment, logicalIdCurrentAssignment); + // Initial the cluster context with the allocated assignments. context.setAssignmentForFaultZoneMap(mapAssignmentToFaultZone(assignableNodes)); return new ClusterModel(context, toBeAssignedReplicas, assignableNodes); } + private static Map generateResourceAssignmentMapLogicalIdView( + Map resourceAssignmentMap, + ClusterTopologyConfig clusterTopologyConfig, ResourceControllerDataProvider dataProvider) { + + Map allInstanceConfigMap = dataProvider.getInstanceConfigMap(); + + return resourceAssignmentMap.entrySet().parallelStream() + .collect(Collectors.toMap(Map.Entry::getKey, entry -> { + String resourceName = entry.getKey(); + ResourceAssignment instanceNameResourceAssignment = entry.getValue(); + ResourceAssignment logicalIdResourceAssignment = new ResourceAssignment(resourceName); + + StateModelDefinition stateModelDefinition = dataProvider.getStateModelDef( + dataProvider.getIdealState(resourceName).getStateModelDefRef()); + + instanceNameResourceAssignment.getMappedPartitions().forEach(partition -> { + Map logicalIdStateMap = new HashMap<>(); + + instanceNameResourceAssignment.getReplicaMap(partition) + .forEach((instanceName, state) -> { + if (allInstanceConfigMap.containsKey(instanceName)) { + String logicalId = allInstanceConfigMap.get(instanceName) + .getLogicalId(clusterTopologyConfig.getEndNodeType()); + if (!logicalIdStateMap.containsKey(logicalId) || state.equals( + stateModelDefinition.getTopState())) { + logicalIdStateMap.put(logicalId, state); + } + } + }); + + logicalIdResourceAssignment.addReplicaMap(partition, logicalIdStateMap); + }); + + return logicalIdResourceAssignment; + })); + } + // Filter the replicas map so only the replicas that have been allocated in the existing // assignmentMap remain in the map. private static void retainExistingReplicas(Map> replicaMap, @@ -399,8 +464,11 @@ private static Set findToBeAssignedReplicasByClusterChanges( Set newlyConnectedNodes = clusterChanges .getOrDefault(HelixConstants.ChangeType.LIVE_INSTANCE, Collections.emptySet()); newlyConnectedNodes.retainAll(liveInstances); - if (clusterChanges.containsKey(HelixConstants.ChangeType.CLUSTER_CONFIG) || clusterChanges - .containsKey(HelixConstants.ChangeType.INSTANCE_CONFIG) || !newlyConnectedNodes.isEmpty()) { + + if (clusterChanges.containsKey(HelixConstants.ChangeType.CLUSTER_CONFIG) + || clusterChanges.containsKey(HelixConstants.ChangeType.INSTANCE_CONFIG) + || !newlyConnectedNodes.isEmpty()) { + // 1. If the cluster topology has been modified, need to reassign all replicas. // 2. If any node was newly connected, need to rebalance all replicas for the evenness of // distribution. @@ -419,7 +487,7 @@ private static Set findToBeAssignedReplicasByClusterChanges( .getOrDefault(HelixConstants.ChangeType.IDEAL_STATE, Collections.emptySet()) .contains(resourceName) || !currentAssignment.containsKey(resourceName)) { toBeAssignedReplicas.addAll(replicas); - continue; // go to check next resource + // go to check next resource } else { // check for every replica assignment to identify if the related replicas need to be reassigned. // > @@ -433,16 +501,15 @@ private static Set findToBeAssignedReplicasByClusterChanges( if (validInstances.isEmpty()) { // 3. if no such an instance in the current assignment, need to reassign the replica toBeAssignedReplicas.add(replica); - continue; // go to check the next replica } else { Iterator iter = validInstances.iterator(); // Remove the instance from the current allocation record after processing so that it // won't be double-processed as we loop through all replicas - String instanceName = iter.next(); + String logicalId = iter.next(); iter.remove(); // the current assignment for this replica is valid, // add to the allocated replica list. - allocatedReplicas.computeIfAbsent(instanceName, key -> new HashSet<>()).add(replica); + allocatedReplicas.computeIfAbsent(logicalId, key -> new HashSet<>()).add(replica); } } } diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java index 8223d9a36f..8ec4b44757 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java @@ -22,9 +22,11 @@ import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.Callable; import java.util.stream.Collectors; @@ -35,6 +37,7 @@ import org.apache.helix.controller.dataproviders.ResourceControllerDataProvider; import org.apache.helix.controller.pipeline.AbstractBaseStage; import org.apache.helix.controller.pipeline.StageException; +import org.apache.helix.controller.rebalancer.AbstractRebalancer; import org.apache.helix.controller.rebalancer.CustomRebalancer; import org.apache.helix.controller.rebalancer.DelayedAutoRebalancer; import org.apache.helix.controller.rebalancer.MaintenanceRebalancer; @@ -86,9 +89,16 @@ public void process(ClusterEvent event) throws Exception { final BestPossibleStateOutput bestPossibleStateOutput = compute(event, resourceMap, currentStateOutput); + + // Add swap-in instances to bestPossibleStateOutput. + // We do this after computing the best possible state output because rebalance algorithms should not + // to be aware of swap-in instances. We simply add the swap-in instances to the + // stateMap where the swap-out instance is and compute the correct state. + addSwapInInstancesToBestPossibleState(resourceMap, bestPossibleStateOutput, cache); + event.addAttribute(AttributeName.BEST_POSSIBLE_STATE.name(), bestPossibleStateOutput); - final Map instanceConfigMap = cache.getInstanceConfigMap(); + final Map allInstanceConfigMap = cache.getInstanceConfigMap(); final Map stateModelDefMap = cache.getStateModelDefMap(); final Map idealStateMap = cache.getIdealStates(); final Map externalViewMap = cache.getExternalViews(); @@ -96,8 +106,8 @@ public void process(ClusterEvent event) throws Exception { asyncExecute(cache.getAsyncTasksThreadPool(), () -> { try { if (clusterStatusMonitor != null) { - clusterStatusMonitor - .setPerInstanceResourceStatus(bestPossibleStateOutput, instanceConfigMap, resourceMap, + clusterStatusMonitor.setPerInstanceResourceStatus(bestPossibleStateOutput, + allInstanceConfigMap, resourceMap, stateModelDefMap); for (String resourceName : idealStateMap.keySet()) { @@ -121,6 +131,52 @@ public void process(ClusterEvent event) throws Exception { }); } + private void addSwapInInstancesToBestPossibleState(Map resourceMap, + BestPossibleStateOutput bestPossibleStateOutput, ResourceControllerDataProvider cache) { + // 1. Get all SWAP_OUT instances and corresponding SWAP_IN instance pairs in the cluster. + Map swapOutToSwapInInstancePairs = cache.getSwapOutToSwapInInstancePairs(); + // 2. Get all enabled and live SWAP_IN instances in the cluster. + Set enabledLiveSwapInInstances = cache.getEnabledLiveSwapInInstanceNames(); + // 3. For each SWAP_OUT instance in any of the preferenceLists, add the corresponding SWAP_IN instance to the end. + // Skipping this when there are not SWAP_IN instances ready(enabled and live) will reduce computation time when there is not an active + // swap occurring. + if (!enabledLiveSwapInInstances.isEmpty() && !cache.isMaintenanceModeEnabled()) { + resourceMap.forEach((resourceName, resource) -> { + StateModelDefinition stateModelDef = cache.getStateModelDef(resource.getStateModelDefRef()); + bestPossibleStateOutput.getResourceStatesMap().get(resourceName).getStateMap() + .forEach((partition, stateMap) -> { + Set commonInstances = new HashSet<>(stateMap.keySet()); + commonInstances.retainAll(swapOutToSwapInInstancePairs.keySet()); + + commonInstances.forEach(swapOutInstance -> { + if (stateMap.get(swapOutInstance).equals(stateModelDef.getTopState())) { + if (AbstractRebalancer.getStateCount(stateModelDef.getTopState(), stateModelDef, + stateMap.size() + 1, stateMap.size() + 1) > stateMap.size()) { + // If the swap-out instance's replica is a topState and the StateModel allows for + // another replica with the topState to be added, set the swap-in instance's replica + // to the topState. + stateMap.put(swapOutToSwapInInstancePairs.get(swapOutInstance), + stateModelDef.getTopState()); + } else { + // If the swap-out instance's replica is a topState and the StateModel does not allow for + // another replica with the topState to be added, set the swap-in instance's replica + // to the secondTopState. + stateMap.put(swapOutToSwapInInstancePairs.get(swapOutInstance), + stateModelDef.getSecondTopStates().iterator().next()); + } + } else if (stateModelDef.getSecondTopStates() + .contains(stateMap.get(swapOutInstance))) { + // If the swap-out instance's replica is a secondTopState, set the swap-in instance's replica + // to the same secondTopState. + stateMap.put(swapOutToSwapInInstancePairs.get(swapOutInstance), + stateMap.get(swapOutInstance)); + } + }); + }); + }); + } + } + private void reportResourceState(ClusterStatusMonitor clusterStatusMonitor, BestPossibleStateOutput bestPossibleStateOutput, String resourceName, IdealState is, ExternalView ev, StateModelDefinition stateModelDef) { @@ -239,7 +295,8 @@ private boolean validateOfflineInstancesLimit(final ResourceControllerDataProvid final HelixManager manager) { int maxOfflineInstancesAllowed = cache.getClusterConfig().getMaxOfflineInstancesAllowed(); if (maxOfflineInstancesAllowed >= 0) { - int offlineCount = cache.getAllInstances().size() - cache.getEnabledLiveInstances().size(); + int offlineCount = + cache.getAssignableInstances().size() - cache.getAssignableEnabledLiveInstances().size(); if (offlineCount > maxOfflineInstancesAllowed) { String errMsg = String.format( "Offline Instances count %d greater than allowed count %d. Put cluster %s into " diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/IntermediateStateCalcStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/IntermediateStateCalcStage.java index 477e4f99bf..b3990046c0 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/IntermediateStateCalcStage.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/IntermediateStateCalcStage.java @@ -551,6 +551,10 @@ private void loadRebalance(Resource resource, Partition partition, messagesThrottled.add(messageToThrottle.getId()); return; } + // TODO: Currently throttling is applied for messages that are targeting all instances including those not considered as + // assignable. They all share the same configured limits. After discussion, there was agreement that this is the proper + // behavior. In addition to this, we should consider adding priority based on whether the instance is assignable and whether + // the message is bringing replica count to configured replicas or above configured replicas. throttleStateTransitionsForReplica(throttleController, resource.getResourceName(), partition, messageToThrottle, messagesThrottled, RebalanceType.LOAD_BALANCE, cache, resourceMessageMap); @@ -656,10 +660,12 @@ private Map getRequiredStates(String resourceName, // Generate a state mapping, state -> required numbers based on the live and enabled instances for this partition // preference list if (preferenceList != null) { - return stateModelDefinition.getStateCountMap((int) preferenceList.stream().filter(i -> resourceControllerDataProvider.getEnabledLiveInstances().contains(i)) + return stateModelDefinition.getStateCountMap((int) preferenceList.stream().filter( + i -> resourceControllerDataProvider.getAssignableEnabledLiveInstances().contains(i)) .count(), requiredNumReplica); // StateModelDefinition's counts } - return stateModelDefinition.getStateCountMap(resourceControllerDataProvider.getEnabledLiveInstances().size(), + return stateModelDefinition.getStateCountMap( + resourceControllerDataProvider.getAssignableEnabledLiveInstances().size(), requiredNumReplica); // StateModelDefinition's counts } diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java index e4a7e12aa5..d262d14023 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/MaintenanceRecoveryStage.java @@ -90,7 +90,7 @@ public void execute(final ClusterEvent event) throws Exception { } // Get the count of all instances that are either offline or disabled int offlineDisabledCount = - cache.getAllInstances().size() - cache.getEnabledLiveInstances().size(); + cache.getAssignableInstances().size() - cache.getAssignableEnabledLiveInstances().size(); shouldExitMaintenance = offlineDisabledCount <= numOfflineInstancesForAutoExit; reason = String.format( "Auto-exiting maintenance mode for cluster %s; Num. of offline/disabled instances is %d, less than or equal to the exit threshold %d", diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/MessageGenerationPhase.java b/helix-core/src/main/java/org/apache/helix/controller/stages/MessageGenerationPhase.java index 8a7ae52b50..5c22c11dba 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/MessageGenerationPhase.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/MessageGenerationPhase.java @@ -266,7 +266,8 @@ private void generateMessage(final Resource resource, final BaseControllerDataPr LogUtil.logError(logger, _eventId, String.format( "An invalid message was generated! Discarding this message. sessionIdMap: %s, CurrentStateMap: %s, InstanceStateMap: %s, AllInstances: %s, LiveInstances: %s, Message: %s", sessionIdMap, currentStateOutput.getCurrentStateMap(resourceName, partition), - instanceStateMap, cache.getAllInstances(), cache.getLiveInstances().keySet(), + instanceStateMap, cache.getAllInstances(), + cache.getLiveInstances().keySet(), message)); continue; // Do not add this message } diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/ReadClusterDataStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/ReadClusterDataStage.java index a7e9742e06..7e8bde9d1b 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/ReadClusterDataStage.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/ReadClusterDataStage.java @@ -19,8 +19,6 @@ * under the License. */ -import java.util.Collection; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/ResourceComputationStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/ResourceComputationStage.java index 1f77fa66a9..00b2fd71b1 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/ResourceComputationStage.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/ResourceComputationStage.java @@ -19,12 +19,10 @@ * under the License. */ -import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; -import org.apache.helix.HelixProperty; import org.apache.helix.controller.LogUtil; import org.apache.helix.controller.dataproviders.BaseControllerDataProvider; import org.apache.helix.controller.dataproviders.WorkflowControllerDataProvider; diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/task/TaskSchedulingStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/task/TaskSchedulingStage.java index dbedf7bcc7..f9662101c7 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/task/TaskSchedulingStage.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/task/TaskSchedulingStage.java @@ -40,7 +40,6 @@ import org.apache.helix.controller.stages.CurrentStateOutput; import org.apache.helix.model.Partition; import org.apache.helix.model.Resource; -import org.apache.helix.model.ResourceAssignment; import org.apache.helix.monitoring.mbeans.ClusterStatusMonitor; import org.apache.helix.task.AssignableInstanceManager; import org.apache.helix.task.TaskConstants; diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java index 34bd564878..7a0fe6377e 100644 --- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java +++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java @@ -643,16 +643,22 @@ private boolean canCompleteSwap(String clusterName, String swapOutInstanceName, HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, baseAccessor); PropertyKey.Builder keyBuilder = accessor.keyBuilder(); - // 1. Check that both instances are alive. + // 1. Check that both instances are alive and enabled. LiveInstance swapOutLiveInstance = accessor.getProperty(keyBuilder.liveInstance(swapOutInstanceName)); LiveInstance swapInLiveInstance = accessor.getProperty(keyBuilder.liveInstance(swapInInstanceName)); - if (swapOutLiveInstance == null || swapInLiveInstance == null) { + InstanceConfig swapOutInstanceConfig = getInstanceConfig(clusterName, swapOutInstanceName); + InstanceConfig swapInInstanceConfig = getInstanceConfig(clusterName, swapInInstanceName); + if (swapOutLiveInstance == null || swapInLiveInstance == null + || !swapOutInstanceConfig.getInstanceEnabled() + || !swapInInstanceConfig.getInstanceEnabled()) { logger.warn( - "SwapOutInstance {} is {} and SwapInInstance {} is {} for cluster {}. Swap will not complete unless both instances are ONLINE.", + "SwapOutInstance {} is {} + {} and SwapInInstance {} is {} + {} for cluster {}. Swap will not complete unless both instances are ONLINE.", swapOutInstanceName, swapOutLiveInstance != null ? "ONLINE" : "OFFLINE", - swapInInstanceName, swapInLiveInstance != null ? "ONLINE" : "OFFLINE", clusterName); + swapOutInstanceConfig.getInstanceEnabled() ? "ENABLED" : "DISABLED", swapInInstanceName, + swapInLiveInstance != null ? "ONLINE" : "OFFLINE", + swapInInstanceConfig.getInstanceEnabled() ? "ENABLED" : "DISABLED", clusterName); return false; } diff --git a/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java b/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java index 4b56057cd9..b347cea038 100644 --- a/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java +++ b/helix-core/src/main/java/org/apache/helix/model/ResourceAssignment.java @@ -39,7 +39,6 @@ * can be in s1. */ public class ResourceAssignment extends HelixProperty { - /** * Initialize an empty mapping * @param resourceName the resource being mapped diff --git a/helix-core/src/main/java/org/apache/helix/task/AbstractTaskDispatcher.java b/helix-core/src/main/java/org/apache/helix/task/AbstractTaskDispatcher.java index 8da61958cf..819ee5d72b 100644 --- a/helix-core/src/main/java/org/apache/helix/task/AbstractTaskDispatcher.java +++ b/helix-core/src/main/java/org/apache/helix/task/AbstractTaskDispatcher.java @@ -647,7 +647,8 @@ protected void handleAdditionalTaskAssignment( int jobCfgLimitation = jobCfg.getNumConcurrentTasksPerInstance() - assignedPartitions.get(instance).size(); // 2. throttled by participant capacity - int participantCapacity = cache.getInstanceConfigMap().get(instance).getMaxConcurrentTask(); + int participantCapacity = + cache.getAssignableInstanceConfigMap().get(instance).getMaxConcurrentTask(); if (participantCapacity == InstanceConfig.MAX_CONCURRENT_TASK_NOT_SET) { participantCapacity = cache.getClusterConfig().getMaxConcurrentTaskPerInstance(); } diff --git a/helix-core/src/main/java/org/apache/helix/task/JobDispatcher.java b/helix-core/src/main/java/org/apache/helix/task/JobDispatcher.java index 4b0e00f817..fd22a8e1fd 100644 --- a/helix-core/src/main/java/org/apache/helix/task/JobDispatcher.java +++ b/helix-core/src/main/java/org/apache/helix/task/JobDispatcher.java @@ -141,8 +141,8 @@ public ResourceAssignment processJobStatusUpdateAndAssignment(String jobName, // Will contain the list of partitions that must be explicitly dropped from the ideal state that // is stored in zk. Set liveInstances = - jobCfg.getInstanceGroupTag() == null ? _dataProvider.getEnabledLiveInstances() - : _dataProvider.getEnabledLiveInstancesWithTag(jobCfg.getInstanceGroupTag()); + jobCfg.getInstanceGroupTag() == null ? _dataProvider.getAssignableEnabledLiveInstances() + : _dataProvider.getAssignableEnabledLiveInstancesWithTag(jobCfg.getInstanceGroupTag()); if (liveInstances.isEmpty()) { LOG.error("No available instance found for job: {}", jobName); @@ -163,7 +163,7 @@ public ResourceAssignment processJobStatusUpdateAndAssignment(String jobName, if (jobTgtState == TargetState.STOP) { // If the assigned instance is no longer live, so mark it as DROPPED in the context markPartitionsWithoutLiveInstance(jobCtx, liveInstances); - + if (jobState != TaskState.NOT_STARTED && TaskUtil.checkJobStopped(jobCtx)) { workflowCtx.setJobState(jobName, TaskState.STOPPED); } else { diff --git a/helix-core/src/main/java/org/apache/helix/tools/ClusterExternalViewVerifier.java b/helix-core/src/main/java/org/apache/helix/tools/ClusterExternalViewVerifier.java index da9324acfd..c7d46b8e4f 100644 --- a/helix-core/src/main/java/org/apache/helix/tools/ClusterExternalViewVerifier.java +++ b/helix-core/src/main/java/org/apache/helix/tools/ClusterExternalViewVerifier.java @@ -146,7 +146,7 @@ public boolean verify() throws Exception { cache.refresh(_accessor); List liveInstances = new ArrayList(); - liveInstances.addAll(cache.getLiveInstances().keySet()); + liveInstances.addAll(cache.getAssignableLiveInstances().keySet()); boolean success = verifyLiveNodes(liveInstances); if (!success) { LOG.info("liveNodes not match, expect: " + _expectSortedLiveNodes + ", actual: " diff --git a/helix-core/src/main/java/org/apache/helix/tools/ClusterVerifiers/StrictMatchExternalViewVerifier.java b/helix-core/src/main/java/org/apache/helix/tools/ClusterVerifiers/StrictMatchExternalViewVerifier.java index ca47c16b9e..d0da9ba8eb 100644 --- a/helix-core/src/main/java/org/apache/helix/tools/ClusterVerifiers/StrictMatchExternalViewVerifier.java +++ b/helix-core/src/main/java/org/apache/helix/tools/ClusterVerifiers/StrictMatchExternalViewVerifier.java @@ -330,16 +330,16 @@ private Map> computeIdealPartitionState( Map> idealPartitionState = new HashMap<>(); for (String partition : idealState.getPartitionSet()) { - List preferenceList = AbstractRebalancer - .getPreferenceList(new Partition(partition), idealState, cache.getEnabledLiveInstances()); + List preferenceList = AbstractRebalancer.getPreferenceList(new Partition(partition), + idealState, cache.getAssignableEnabledLiveInstances()); Map idealMapping; if (_isDeactivatedNodeAware) { - idealMapping = HelixUtil - .computeIdealMapping(preferenceList, stateModelDef, cache.getLiveInstances().keySet(), + idealMapping = HelixUtil.computeIdealMapping(preferenceList, stateModelDef, + cache.getAssignableLiveInstances().keySet(), cache.getDisabledInstancesForPartition(idealState.getResourceName(), partition)); } else { - idealMapping = HelixUtil - .computeIdealMapping(preferenceList, stateModelDef, cache.getEnabledLiveInstances(), + idealMapping = HelixUtil.computeIdealMapping(preferenceList, stateModelDef, + cache.getAssignableEnabledLiveInstances(), Collections.emptySet()); } idealPartitionState.put(partition, idealMapping); diff --git a/helix-core/src/main/java/org/apache/helix/util/HelixUtil.java b/helix-core/src/main/java/org/apache/helix/util/HelixUtil.java index 88c33f608e..4a3d49b73a 100644 --- a/helix-core/src/main/java/org/apache/helix/util/HelixUtil.java +++ b/helix-core/src/main/java/org/apache/helix/util/HelixUtil.java @@ -294,9 +294,9 @@ private static Map getAssignmentForWagedFullAutoImpl .collect(Collectors.toMap(InstanceConfig::getInstanceName, Function.identity()))); // For LiveInstances, we must preserve the existing session IDs // So read LiveInstance objects from the cluster and do a "retainAll" on them - // liveInstanceMap is an unmodifiableMap instances, so we filter using a stream - Map liveInstanceMap = dataProvider.getLiveInstances(); - List filteredLiveInstances = liveInstanceMap.entrySet().stream() + // assignableLiveInstanceMap is an unmodifiableMap instances, so we filter using a stream + Map assignableLiveInstanceMap = dataProvider.getAssignableLiveInstances(); + List filteredLiveInstances = assignableLiveInstanceMap.entrySet().stream() .filter(entry -> liveInstances.contains(entry.getKey())).map(Map.Entry::getValue) .collect(Collectors.toList()); // Synthetically create LiveInstance objects that are passed in as the parameter diff --git a/helix-core/src/test/java/org/apache/helix/controller/changedetector/trimmer/TestHelixPropoertyTimmer.java b/helix-core/src/test/java/org/apache/helix/controller/changedetector/trimmer/TestHelixPropoertyTimmer.java index 80679310a1..6f6cdc0a7c 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/changedetector/trimmer/TestHelixPropoertyTimmer.java +++ b/helix-core/src/test/java/org/apache/helix/controller/changedetector/trimmer/TestHelixPropoertyTimmer.java @@ -111,11 +111,11 @@ private ResourceControllerDataProvider getMockDataProvider( ResourceControllerDataProvider dataProvider = Mockito.mock(ResourceControllerDataProvider.class); when(dataProvider.getRefreshedChangeTypes()).thenReturn(changeTypes); - when(dataProvider.getInstanceConfigMap()).thenReturn(instanceConfigMap); + when(dataProvider.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); when(dataProvider.getIdealStates()).thenReturn(idealStateMap); when(dataProvider.getResourceConfigMap()).thenReturn(resourceConfigMap); when(dataProvider.getClusterConfig()).thenReturn(clusterConfig); - when(dataProvider.getLiveInstances()).thenReturn(Collections.emptyMap()); + when(dataProvider.getAssignableLiveInstances()).thenReturn(Collections.emptyMap()); return dataProvider; } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/TestAutoRebalanceStrategy.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/TestAutoRebalanceStrategy.java index 32a131d483..905c5552bd 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/TestAutoRebalanceStrategy.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/TestAutoRebalanceStrategy.java @@ -243,7 +243,7 @@ private Map> getMapping(final Map assignment = new AutoRebalancer() - .computeBestPossibleStateForPartition(cache.getLiveInstances().keySet(), _stateModelDef, + .computeBestPossibleStateForPartition(cache.getAssignableLiveInstances().keySet(), _stateModelDef, preferenceList, currentStateOutput, disabled, is, clusterConfig, p, MonitoredAbnormalResolver.DUMMY_STATE_RESOLVER); mapResult.put(partition, assignment); diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/TestWagedRebalancer.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/TestWagedRebalancer.java index 000978ef1a..951e0e3c52 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/TestWagedRebalancer.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/TestWagedRebalancer.java @@ -20,7 +20,7 @@ */ import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; + import java.io.IOException; import java.util.Arrays; import java.util.Collections; @@ -108,16 +108,21 @@ protected ResourceControllerDataProvider setupClusterDataCache() throws IOExcept _instances.add(instanceName); // 1. Set up the default instance information with capacity configuration. InstanceConfig testInstanceConfig = createMockInstanceConfig(instanceName); - Map instanceConfigMap = testCache.getInstanceConfigMap(); + Map instanceConfigMap = testCache.getAssignableInstanceConfigMap(); instanceConfigMap.put(instanceName, testInstanceConfig); + when(testCache.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); when(testCache.getInstanceConfigMap()).thenReturn(instanceConfigMap); // 2. Mock the live instance node for the default instance. LiveInstance testLiveInstance = createMockLiveInstance(instanceName); - Map liveInstanceMap = testCache.getLiveInstances(); + Map liveInstanceMap = testCache.getAssignableLiveInstances(); liveInstanceMap.put(instanceName, testLiveInstance); + when(testCache.getAssignableLiveInstances()).thenReturn(liveInstanceMap); when(testCache.getLiveInstances()).thenReturn(liveInstanceMap); + when(testCache.getAssignableEnabledInstances()).thenReturn(liveInstanceMap.keySet()); when(testCache.getEnabledInstances()).thenReturn(liveInstanceMap.keySet()); + when(testCache.getAssignableEnabledLiveInstances()).thenReturn(liveInstanceMap.keySet()); when(testCache.getEnabledLiveInstances()).thenReturn(liveInstanceMap.keySet()); + when(testCache.getAssignableInstances()).thenReturn(_instances); when(testCache.getAllInstances()).thenReturn(_instances); } @@ -370,7 +375,7 @@ public void testInvalidClusterStatus() throws IOException, HelixRebalanceExcepti Collectors.toMap(resourceName -> resourceName, Resource::new)); try { rebalancer.computeBestPossibleAssignment(clusterData, resourceMap, - clusterData.getEnabledLiveInstances(), new CurrentStateOutput(), _algorithm); + clusterData.getAssignableEnabledLiveInstances(), new CurrentStateOutput(), _algorithm); Assert.fail("Rebalance shall fail."); } catch (HelixRebalanceException ex) { Assert.assertEquals(ex.getFailureType(), HelixRebalanceException.Type.FAILED_TO_CALCULATE); @@ -434,7 +439,7 @@ public void testAlgorithmException() // Calculation will fail try { rebalancer.computeBestPossibleAssignment(clusterData, resourceMap, - clusterData.getEnabledLiveInstances(), new CurrentStateOutput(), badAlgorithm); + clusterData.getAssignableEnabledLiveInstances(), new CurrentStateOutput(), badAlgorithm); Assert.fail("Rebalance shall fail."); } catch (HelixRebalanceException ex) { Assert.assertEquals(ex.getFailureType(), HelixRebalanceException.Type.FAILED_TO_CALCULATE); @@ -601,6 +606,11 @@ public void testEmergencyRebalance() throws IOException, HelixRebalanceException String offlinePartition = _partitionNames.get(0); String offlineState = "MASTER"; String offlineInstance = "offlineInstance"; + InstanceConfig offlineInstanceConfig = createMockInstanceConfig(offlineInstance); + Map instanceConfigMap = clusterData.getAssignableInstanceConfigMap(); + instanceConfigMap.put(offlineInstance, offlineInstanceConfig); + when(clusterData.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); + when(clusterData.getInstanceConfigMap()).thenReturn(instanceConfigMap); for (Partition partition : bestPossibleAssignment.get(offlineResource).getMappedPartitions()) { if (partition.getPartitionName().equals(offlinePartition)) { bestPossibleAssignment.get(offlineResource) @@ -649,12 +659,13 @@ public void testRebalanceOverwriteTrigger() throws IOException, HelixRebalanceEx Set instances = new HashSet<>(_instances); String offlineInstance = "offlineInstance"; instances.add(offlineInstance); - when(clusterData.getAllInstances()).thenReturn(instances); + when(clusterData.getAssignableInstances()).thenReturn(instances); Map instanceOfflineTimeMap = new HashMap<>(); instanceOfflineTimeMap.put(offlineInstance, System.currentTimeMillis() + Integer.MAX_VALUE); when(clusterData.getInstanceOfflineTimeMap()).thenReturn(instanceOfflineTimeMap); - Map instanceConfigMap = clusterData.getInstanceConfigMap(); + Map instanceConfigMap = clusterData.getAssignableInstanceConfigMap(); instanceConfigMap.put(offlineInstance, createMockInstanceConfig(offlineInstance)); + when(clusterData.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); when(clusterData.getInstanceConfigMap()).thenReturn(instanceConfigMap); // Set minActiveReplica to 0 so that requireRebalanceOverwrite returns false @@ -737,15 +748,16 @@ public void testRebalanceOverwrite() throws HelixRebalanceException, IOException // force create a fake offlineInstance that's in delay window Set instances = new HashSet<>(_instances); instances.add(offlineInstance); - when(clusterData.getAllInstances()).thenReturn(instances); - when(clusterData.getEnabledInstances()).thenReturn(instances); - when(clusterData.getEnabledLiveInstances()).thenReturn( + when(clusterData.getAssignableInstances()).thenReturn(instances); + when(clusterData.getAssignableEnabledInstances()).thenReturn(instances); + when(clusterData.getAssignableEnabledLiveInstances()).thenReturn( new HashSet<>(Arrays.asList(instance0, instance1, instance2))); Map instanceOfflineTimeMap = new HashMap<>(); instanceOfflineTimeMap.put(offlineInstance, System.currentTimeMillis() + Integer.MAX_VALUE); when(clusterData.getInstanceOfflineTimeMap()).thenReturn(instanceOfflineTimeMap); - Map instanceConfigMap = clusterData.getInstanceConfigMap(); + Map instanceConfigMap = clusterData.getAssignableInstanceConfigMap(); instanceConfigMap.put(offlineInstance, createMockInstanceConfig(offlineInstance)); + when(clusterData.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); when(clusterData.getInstanceConfigMap()).thenReturn(instanceConfigMap); Map isMap = new HashMap<>(); @@ -881,10 +893,11 @@ public void testInstanceCapacityProvider() throws IOException, HelixRebalanceExc // force create a fake offlineInstance that's in delay window Set instances = new HashSet<>(_instances); - when(clusterData.getAllInstances()).thenReturn(instances); - when(clusterData.getEnabledInstances()).thenReturn(instances); - when(clusterData.getEnabledLiveInstances()).thenReturn(instances); - Map instanceConfigMap = clusterData.getInstanceConfigMap(); + when(clusterData.getAssignableInstances()).thenReturn(instances); + when(clusterData.getAssignableEnabledInstances()).thenReturn(instances); + when(clusterData.getAssignableEnabledLiveInstances()).thenReturn(instances); + Map instanceConfigMap = clusterData.getAssignableInstanceConfigMap(); + when(clusterData.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); when(clusterData.getInstanceConfigMap()).thenReturn(instanceConfigMap); Map isMap = new HashMap<>(); diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/TestWagedRebalancerMetrics.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/TestWagedRebalancerMetrics.java index bdc677bde4..c5c7b560c6 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/TestWagedRebalancerMetrics.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/TestWagedRebalancerMetrics.java @@ -256,16 +256,21 @@ protected ResourceControllerDataProvider setupClusterDataCache() throws IOExcept _instances.add(instanceName); // 1. Set up the default instance information with capacity configuration. InstanceConfig testInstanceConfig = createMockInstanceConfig(instanceName); - Map instanceConfigMap = testCache.getInstanceConfigMap(); + Map instanceConfigMap = testCache.getAssignableInstanceConfigMap(); instanceConfigMap.put(instanceName, testInstanceConfig); + when(testCache.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); when(testCache.getInstanceConfigMap()).thenReturn(instanceConfigMap); // 2. Mock the live instance node for the default instance. LiveInstance testLiveInstance = createMockLiveInstance(instanceName); - Map liveInstanceMap = testCache.getLiveInstances(); + Map liveInstanceMap = testCache.getAssignableLiveInstances(); liveInstanceMap.put(instanceName, testLiveInstance); + when(testCache.getAssignableLiveInstances()).thenReturn(liveInstanceMap); when(testCache.getLiveInstances()).thenReturn(liveInstanceMap); + when(testCache.getAssignableEnabledInstances()).thenReturn(liveInstanceMap.keySet()); when(testCache.getEnabledInstances()).thenReturn(liveInstanceMap.keySet()); + when(testCache.getAssignableEnabledLiveInstances()).thenReturn(liveInstanceMap.keySet()); when(testCache.getEnabledLiveInstances()).thenReturn(liveInstanceMap.keySet()); + when(testCache.getAssignableInstances()).thenReturn(_instances); when(testCache.getAllInstances()).thenReturn(_instances); } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestPartitionMovementConstraint.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestPartitionMovementConstraint.java index 16c1994702..9b4d1de15f 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestPartitionMovementConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/constraints/TestPartitionMovementConstraint.java @@ -53,6 +53,7 @@ public void init() { when(_testReplica.getResourceName()).thenReturn(RESOURCE); when(_testReplica.getPartitionName()).thenReturn(PARTITION); when(_testNode.getInstanceName()).thenReturn(INSTANCE); + when(_testNode.getLogicalId()).thenReturn(INSTANCE); } @Test @@ -104,6 +105,7 @@ public void testGetAssignmentScore() { // when the replica's state matches with best possible, allocation matches with baseline when(testAssignableNode.getInstanceName()).thenReturn(instanceNameA); + when(testAssignableNode.getLogicalId()).thenReturn(instanceNameA); when(_testReplica.getReplicaState()).thenReturn("Master"); verifyScore(_baselineInfluenceConstraint, testAssignableNode, _testReplica, _clusterContext, 0.5, 0.5); @@ -112,6 +114,7 @@ public void testGetAssignmentScore() { // when the replica's allocation matches with best possible only when(testAssignableNode.getInstanceName()).thenReturn(instanceNameB); + when(testAssignableNode.getLogicalId()).thenReturn(instanceNameB); when(_testReplica.getReplicaState()).thenReturn("Master"); verifyScore(_baselineInfluenceConstraint, testAssignableNode, _testReplica, _clusterContext, 0.0, 0.0); @@ -120,6 +123,7 @@ public void testGetAssignmentScore() { // when the replica's state matches with baseline only when(testAssignableNode.getInstanceName()).thenReturn(instanceNameC); + when(testAssignableNode.getLogicalId()).thenReturn(instanceNameC); when(_testReplica.getReplicaState()).thenReturn("Master"); verifyScore(_baselineInfluenceConstraint, testAssignableNode, _testReplica, _clusterContext, 1.0, 1.0); @@ -128,6 +132,7 @@ public void testGetAssignmentScore() { // when the replica's allocation matches with baseline only when(testAssignableNode.getInstanceName()).thenReturn(instanceNameC); + when(testAssignableNode.getLogicalId()).thenReturn(instanceNameC); when(_testReplica.getReplicaState()).thenReturn("Slave"); verifyScore(_baselineInfluenceConstraint, testAssignableNode, _testReplica, _clusterContext, 0.5, 0.5); diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/AbstractTestClusterModel.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/AbstractTestClusterModel.java index b9c4ce39d4..c9deb792de 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/AbstractTestClusterModel.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/AbstractTestClusterModel.java @@ -103,6 +103,7 @@ protected ResourceControllerDataProvider setupClusterDataCache() throws IOExcept testInstanceConfig.setInstanceEnabledForPartition("TestResource", "TestPartition", false); Map instanceConfigMap = new HashMap<>(); instanceConfigMap.put(_testInstanceId, testInstanceConfig); + when(testCache.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); when(testCache.getInstanceConfigMap()).thenReturn(instanceConfigMap); // 2. Set up the basic cluster configuration. @@ -121,7 +122,7 @@ protected ResourceControllerDataProvider setupClusterDataCache() throws IOExcept LiveInstance testLiveInstance = createMockLiveInstance(_testInstanceId); Map liveInstanceMap = new HashMap<>(); liveInstanceMap.put(_testInstanceId, testLiveInstance); - when(testCache.getLiveInstances()).thenReturn(liveInstanceMap); + when(testCache.getAssignableLiveInstances()).thenReturn(liveInstanceMap); // 4. Mock two resources, each with 2 partitions on the default instance. // The instance will have the following partitions assigned @@ -288,7 +289,7 @@ protected Set generateReplicas(ResourceControllerDataProvider protected Set generateNodes(ResourceControllerDataProvider testCache) { Set nodeSet = new HashSet<>(); - testCache.getInstanceConfigMap().values().forEach(config -> nodeSet + testCache.getAssignableInstanceConfigMap().values().forEach(config -> nodeSet .add(new AssignableNode(testCache.getClusterConfig(), config, config.getInstanceName()))); return nodeSet; } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelTestHelper.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelTestHelper.java index e13fb39798..d63f05c81f 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelTestHelper.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelTestHelper.java @@ -63,7 +63,7 @@ private ClusterModel getClusterHelper(ResourceControllerDataProvider testCache) Map instanceConfigMap = new HashMap<>(); instanceConfigMap.put(TEST_INSTANCE_ID_1, testInstanceConfig1); instanceConfigMap.put(TEST_INSTANCE_ID_2, testInstanceConfig2); - when(testCache.getInstanceConfigMap()).thenReturn(instanceConfigMap); + when(testCache.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); Set assignableReplicas = generateReplicas(testCache); Set assignableNodes = generateNodes(testCache); diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/TestAssignableNode.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/TestAssignableNode.java index 9cbfc2560b..0f751b1868 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/TestAssignableNode.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/TestAssignableNode.java @@ -64,7 +64,7 @@ public void testNormalUsage() throws IOException { expectedCapacityMap.put("item3", 30); AssignableNode assignableNode = new AssignableNode(testCache.getClusterConfig(), - testCache.getInstanceConfigMap().get(_testInstanceId), _testInstanceId); + testCache.getAssignableInstanceConfigMap().get(_testInstanceId), _testInstanceId); assignableNode.assignInitBatch(assignmentSet); Assert.assertEquals(assignableNode.getAssignedPartitionsMap(), expectedAssignment); Assert.assertEquals(assignableNode.getAssignedReplicaCount(), 4); @@ -177,7 +177,7 @@ public void testReleaseNoPartition() throws IOException { ResourceControllerDataProvider testCache = setupClusterDataCache(); AssignableNode assignableNode = new AssignableNode(testCache.getClusterConfig(), - testCache.getInstanceConfigMap().get(_testInstanceId), _testInstanceId); + testCache.getAssignableInstanceConfigMap().get(_testInstanceId), _testInstanceId); AssignableReplica removingReplica = new AssignableReplica(testCache.getClusterConfig(), testCache.getResourceConfig(_resourceNames.get(1)), _partitionNames.get(2) + "non-exist", "MASTER", 1); @@ -192,7 +192,7 @@ public void testAssignDuplicateReplica() throws IOException { Set assignmentSet = generateReplicas(testCache); AssignableNode assignableNode = new AssignableNode(testCache.getClusterConfig(), - testCache.getInstanceConfigMap().get(_testInstanceId), _testInstanceId); + testCache.getAssignableInstanceConfigMap().get(_testInstanceId), _testInstanceId); assignableNode.assignInitBatch(assignmentSet); AssignableReplica duplicateReplica = new AssignableReplica(testCache.getClusterConfig(), testCache.getResourceConfig(_resourceNames.get(0)), _partitionNames.get(0), "SLAVE", 2); @@ -213,10 +213,10 @@ public void testParseFaultZoneNotFound() throws IOException { testInstanceConfig.setDomain("instance=testInstance"); Map instanceConfigMap = new HashMap<>(); instanceConfigMap.put(_testInstanceId, testInstanceConfig); - when(testCache.getInstanceConfigMap()).thenReturn(instanceConfigMap); + when(testCache.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); AssignableNode node = new AssignableNode(testCache.getClusterConfig(), - testCache.getInstanceConfigMap().get(_testInstanceId), _testInstanceId); + testCache.getAssignableInstanceConfigMap().get(_testInstanceId), _testInstanceId); Assert.assertEquals(node.getFaultZone(), "Helix_default_zone"); } @@ -234,10 +234,10 @@ public void testParseFaultZone() throws IOException { testInstanceConfig.setDomain("zone=2, instance=testInstance"); Map instanceConfigMap = new HashMap<>(); instanceConfigMap.put(_testInstanceId, testInstanceConfig); - when(testCache.getInstanceConfigMap()).thenReturn(instanceConfigMap); + when(testCache.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); AssignableNode assignableNode = new AssignableNode(testCache.getClusterConfig(), - testCache.getInstanceConfigMap().get(_testInstanceId), _testInstanceId); + testCache.getAssignableInstanceConfigMap().get(_testInstanceId), _testInstanceId); Assert.assertEquals(assignableNode.getFaultZone(), "2"); @@ -251,10 +251,10 @@ public void testParseFaultZone() throws IOException { testInstanceConfig.setDomain("zone=2, instance=testInstance"); instanceConfigMap = new HashMap<>(); instanceConfigMap.put(_testInstanceId, testInstanceConfig); - when(testCache.getInstanceConfigMap()).thenReturn(instanceConfigMap); + when(testCache.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); assignableNode = new AssignableNode(testCache.getClusterConfig(), - testCache.getInstanceConfigMap().get(_testInstanceId), _testInstanceId); + testCache.getAssignableInstanceConfigMap().get(_testInstanceId), _testInstanceId); Assert.assertEquals(assignableNode.getFaultZone(), "2/testInstance"); @@ -268,10 +268,10 @@ public void testParseFaultZone() throws IOException { testInstanceConfig.setDomain("rack=3, zone=2, instance=testInstanceConfigId"); instanceConfigMap = new HashMap<>(); instanceConfigMap.put(_testInstanceId, testInstanceConfig); - when(testCache.getInstanceConfigMap()).thenReturn(instanceConfigMap); + when(testCache.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); when(testCache.getClusterConfig()).thenReturn(testClusterConfig); assignableNode = new AssignableNode(testCache.getClusterConfig(), - testCache.getInstanceConfigMap().get(_testInstanceId), _testInstanceId); + testCache.getAssignableInstanceConfigMap().get(_testInstanceId), _testInstanceId); Assert.assertEquals(assignableNode.getFaultZone(), "3/2"); } diff --git a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/TestClusterModelProvider.java b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/TestClusterModelProvider.java index 500cfb0b6c..34582d600d 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/TestClusterModelProvider.java +++ b/helix-core/src/test/java/org/apache/helix/controller/rebalancer/waged/model/TestClusterModelProvider.java @@ -79,14 +79,15 @@ protected ResourceControllerDataProvider setupClusterDataCache() throws IOExcept _instances.add(instanceName); // 1. Set up the default instance information with capacity configuration. InstanceConfig testInstanceConfig = createMockInstanceConfig(instanceName); - Map instanceConfigMap = testCache.getInstanceConfigMap(); + Map instanceConfigMap = testCache.getAssignableInstanceConfigMap(); instanceConfigMap.put(instanceName, testInstanceConfig); + when(testCache.getAssignableInstanceConfigMap()).thenReturn(instanceConfigMap); when(testCache.getInstanceConfigMap()).thenReturn(instanceConfigMap); // 2. Mock the live instance node for the default instance. LiveInstance testLiveInstance = createMockLiveInstance(instanceName); - Map liveInstanceMap = testCache.getLiveInstances(); + Map liveInstanceMap = testCache.getAssignableLiveInstances(); liveInstanceMap.put(instanceName, testLiveInstance); - when(testCache.getLiveInstances()).thenReturn(liveInstanceMap); + when(testCache.getAssignableLiveInstances()).thenReturn(liveInstanceMap); } return testCache; @@ -104,8 +105,8 @@ public void testFindToBeAssignedReplicasForMinActiveReplica() throws IOException Set activeInstances = new HashSet<>(); activeInstances.add(instance1); activeInstances.add(instance2); - when(testCache.getLiveInstances()).thenReturn(liveInstanceMap); - when(testCache.getEnabledLiveInstances()).thenReturn(activeInstances); + when(testCache.getAssignableLiveInstances()).thenReturn(liveInstanceMap); + when(testCache.getAssignableEnabledLiveInstances()).thenReturn(activeInstances); // test 0, empty input Assert.assertEquals( @@ -142,8 +143,8 @@ public void testFindToBeAssignedReplicasForMinActiveReplica() throws IOException // test 2, no additional replica to be assigned testCache = setupClusterDataCache(); - when(testCache.getLiveInstances()).thenReturn(liveInstanceMap); - when(testCache.getEnabledLiveInstances()).thenReturn(activeInstances); + when(testCache.getAssignableLiveInstances()).thenReturn(liveInstanceMap); + when(testCache.getAssignableEnabledLiveInstances()).thenReturn(activeInstances); input = ImmutableMap.of( _resourceNames.get(0), ImmutableMap.of( @@ -167,8 +168,8 @@ public void testFindToBeAssignedReplicasForMinActiveReplica() throws IOException // test 3, minActiveReplica==2, two partitions falling short testCache = setupClusterDataCache(); - when(testCache.getLiveInstances()).thenReturn(liveInstanceMap); - when(testCache.getEnabledLiveInstances()).thenReturn(activeInstances); + when(testCache.getAssignableLiveInstances()).thenReturn(liveInstanceMap); + when(testCache.getAssignableEnabledLiveInstances()).thenReturn(activeInstances); input = ImmutableMap.of( _resourceNames.get(0), ImmutableMap.of( @@ -205,8 +206,8 @@ public void testClusterModelForDelayedRebalanceOverwrite() throws IOException { Set activeInstances = new HashSet<>(); activeInstances.add(instance1); activeInstances.add(instance2); - when(testCache.getLiveInstances()).thenReturn(liveInstanceMap); - when(testCache.getEnabledLiveInstances()).thenReturn(activeInstances); + when(testCache.getAssignableLiveInstances()).thenReturn(liveInstanceMap); + when(testCache.getAssignableEnabledLiveInstances()).thenReturn(activeInstances); // test 1, one partition under minActiveReplica Map>> input = ImmutableMap.of( @@ -245,8 +246,8 @@ public void testClusterModelForDelayedRebalanceOverwrite() throws IOException { // test 2, minActiveReplica==2, three partitions falling short testCache = setupClusterDataCache(); - when(testCache.getLiveInstances()).thenReturn(liveInstanceMap); - when(testCache.getEnabledLiveInstances()).thenReturn(activeInstances); + when(testCache.getAssignableLiveInstances()).thenReturn(liveInstanceMap); + when(testCache.getAssignableEnabledLiveInstances()).thenReturn(activeInstances); input = ImmutableMap.of( _resourceNames.get(0), ImmutableMap.of( @@ -413,7 +414,7 @@ public void testGenerateClusterModel() throws IOException { .allMatch(replicaSet -> replicaSet.size() == 4)); // Adjust instance fault zone, so they have different fault zones. - testCache.getInstanceConfigMap().values().stream() + testCache.getAssignableInstanceConfigMap().values().stream() .forEach(config -> config.setZoneId(config.getInstanceName())); clusterModel = ClusterModelProvider.generateClusterModelForBaseline(testCache, _resourceNames.stream() @@ -576,7 +577,7 @@ public void testGenerateClusterModelForPartialRebalance() throws IOException { Assert.assertEquals(clusterModel.getAssignableReplicaMap().size(), 0); // Adjust instance fault zone, so they have different fault zones. - testCache.getInstanceConfigMap().values().stream() + testCache.getAssignableInstanceConfigMap().values().stream() .forEach(config -> config.setZoneId(config.getInstanceName())); // 2. test with a pair of identical best possible assignment and baseline assignment diff --git a/helix-core/src/test/java/org/apache/helix/controller/stages/TestBestPossibleCalcStageCompatibility.java b/helix-core/src/test/java/org/apache/helix/controller/stages/TestBestPossibleCalcStageCompatibility.java index 4d8fb8669f..0027f8e4ef 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/stages/TestBestPossibleCalcStageCompatibility.java +++ b/helix-core/src/test/java/org/apache/helix/controller/stages/TestBestPossibleCalcStageCompatibility.java @@ -49,8 +49,9 @@ public void testSemiAutoModeCompatibility() { "testResourceName" }; setupIdealStateDeprecated(5, resources, 10, 1, IdealStateModeProperty.AUTO); - setupLiveInstances(5); setupStateModel(); + setupInstances(5); + setupLiveInstances(5); Map resourceMap = getResourceMap(); CurrentStateOutput currentStateOutput = new CurrentStateOutput(); diff --git a/helix-core/src/test/java/org/apache/helix/controller/stages/TestBestPossibleStateCalcStage.java b/helix-core/src/test/java/org/apache/helix/controller/stages/TestBestPossibleStateCalcStage.java index 7289dc3f8b..e33dc9f5da 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/stages/TestBestPossibleStateCalcStage.java +++ b/helix-core/src/test/java/org/apache/helix/controller/stages/TestBestPossibleStateCalcStage.java @@ -50,6 +50,7 @@ public void testSimple() { BuiltInStateModelDefinitions.MasterSlave.name()); setupLiveInstances(5); setupStateModel(); + setupInstances(5); Map resourceMap = getResourceMap(resources, numPartition, BuiltInStateModelDefinitions.MasterSlave.name()); diff --git a/helix-core/src/test/java/org/apache/helix/controller/stages/TestCancellationMessageGeneration.java b/helix-core/src/test/java/org/apache/helix/controller/stages/TestCancellationMessageGeneration.java index 2fcfc3e59c..7b891522cf 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/stages/TestCancellationMessageGeneration.java +++ b/helix-core/src/test/java/org/apache/helix/controller/stages/TestCancellationMessageGeneration.java @@ -28,7 +28,6 @@ import org.apache.helix.HelixDefinedState; import org.apache.helix.HelixManager; import org.apache.helix.controller.common.PartitionStateMap; -import org.apache.helix.controller.common.ResourcesStateMap; import org.apache.helix.controller.dataproviders.BaseControllerDataProvider; import org.apache.helix.model.ClusterConfig; import org.apache.helix.model.LiveInstance; diff --git a/helix-core/src/test/java/org/apache/helix/controller/stages/TestIntermediateStateCalcStage.java b/helix-core/src/test/java/org/apache/helix/controller/stages/TestIntermediateStateCalcStage.java index 60281e65b8..7da3d64c25 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/stages/TestIntermediateStateCalcStage.java +++ b/helix-core/src/test/java/org/apache/helix/controller/stages/TestIntermediateStateCalcStage.java @@ -572,6 +572,7 @@ private void preSetup(String[] resources, int numOfLiveInstances, int numOfRepli setupIdealState(numOfLiveInstances, resources, numOfLiveInstances, numOfReplicas, IdealState.RebalanceMode.FULL_AUTO, "OnlineOffline"); setupStateModel(); + setupInstances(numOfLiveInstances); setupLiveInstances(numOfLiveInstances); // Set up cluster configs diff --git a/helix-core/src/test/java/org/apache/helix/controller/stages/TestRebalancePipeline.java b/helix-core/src/test/java/org/apache/helix/controller/stages/TestRebalancePipeline.java index 40fa82030a..d3c41018ce 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/stages/TestRebalancePipeline.java +++ b/helix-core/src/test/java/org/apache/helix/controller/stages/TestRebalancePipeline.java @@ -57,8 +57,10 @@ public class TestRebalancePipeline extends ZkUnitTestBase { @Test public void testDuplicateMsg() throws Exception { String clusterName = "CLUSTER_" + _className + "_dup"; - System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis())); + HelixAdmin admin = new ZKHelixAdmin(_gZkClient); + System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis())); + admin.addCluster(clusterName); HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<>(_gZkClient)); refreshClusterConfig(clusterName, accessor); @@ -82,10 +84,11 @@ public void testDuplicateMsg() throws Exception { setupIdealState(clusterName, new int[] { 0 }, resourceGroups, 1, 1); + setupStateModel(clusterName); + setupInstances(clusterName, new int[]{0}); List liveInstances = setupLiveInstances(clusterName, new int[] { 0 }); - setupStateModel(clusterName); // cluster data cache refresh pipeline Pipeline dataRefresh = new Pipeline(); @@ -321,10 +324,11 @@ public void testChangeIdealStateWithPendingMsg() throws Exception { setupIdealState(clusterName, new int[] { 0 }, resourceGroups, 1, 1); + setupStateModel(clusterName); + setupInstances(clusterName, new int[]{0}); List liveInstances = setupLiveInstances(clusterName, new int[] { 0 }); - setupStateModel(clusterName); // cluster data cache refresh pipeline Pipeline dataRefresh = new Pipeline(); @@ -395,8 +399,10 @@ public void testChangeIdealStateWithPendingMsg() throws Exception { @Test public void testMasterXfer() throws Exception { String clusterName = "CLUSTER_" + _className + "_xfer"; + HelixAdmin admin = new ZKHelixAdmin(_gZkClient); System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis())); + admin.addCluster(clusterName); HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<>(_gZkClient)); @@ -417,10 +423,11 @@ public void testMasterXfer() throws Exception { setupIdealState(clusterName, new int[] { 0, 1 }, resourceGroups, 1, 2); + setupStateModel(clusterName); + setupInstances(clusterName, new int[]{0, 1}); List liveInstances = setupLiveInstances(clusterName, new int[] { 1 }); - setupStateModel(clusterName); // cluster data cache refresh pipeline Pipeline dataRefresh = new Pipeline(); @@ -474,8 +481,10 @@ public void testMasterXfer() throws Exception { @Test public void testNoDuplicatedMaster() throws Exception { String clusterName = "CLUSTER_" + _className + "_no_duplicated_master"; + HelixAdmin admin = new ZKHelixAdmin(_gZkClient); System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis())); + admin.addCluster(clusterName); HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<>(_gZkClient)); @@ -496,10 +505,11 @@ public void testNoDuplicatedMaster() throws Exception { setupIdealState(clusterName, new int[] { 0, 1 }, resourceGroups, 1, 2); + setupStateModel(clusterName); + setupInstances(clusterName, new int[]{0, 1}); List liveInstances = setupLiveInstances(clusterName, new int[] { 0, 1 }); - setupStateModel(clusterName); // cluster data cache refresh pipeline Pipeline dataRefresh = new Pipeline(); @@ -553,6 +563,9 @@ public void testNoDuplicatedMaster() throws Exception { public void testNoMessageSentOnControllerLeadershipLoss() throws Exception { String methodName = TestHelper.getTestMethodName(); String clusterName = _className + "_" + methodName; + HelixAdmin admin = new ZKHelixAdmin(_gZkClient); + + admin.addCluster(clusterName); final String resourceName = "testResource_" + methodName; final String partitionName = resourceName + "_0"; @@ -565,10 +578,11 @@ public void testNoMessageSentOnControllerLeadershipLoss() throws Exception { setupIdealState(clusterName, new int[] { 0 }, resourceGroups, 1, 1); + setupStateModel(clusterName); + setupInstances(clusterName, new int[]{0}); List liveInstances = setupLiveInstances(clusterName, new int[] { 0 }); - setupStateModel(clusterName); HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<>(_gZkClient)); diff --git a/helix-core/src/test/java/org/apache/helix/controller/stages/TestReplicaLevelThrottling.java b/helix-core/src/test/java/org/apache/helix/controller/stages/TestReplicaLevelThrottling.java index af47542ee3..e4aeed04f8 100644 --- a/helix-core/src/test/java/org/apache/helix/controller/stages/TestReplicaLevelThrottling.java +++ b/helix-core/src/test/java/org/apache/helix/controller/stages/TestReplicaLevelThrottling.java @@ -71,7 +71,7 @@ private void prepareCache(Map cacheMap, Mock mock) { when(mock.cache.getClusterConfig()).thenReturn((ClusterConfig) cacheMap.get(CacheKeys.clusterConfig.name())); when(mock.cache.getStateModelDef((String) cacheMap.get(CacheKeys.stateModelName.name()))).thenReturn( (StateModelDefinition) cacheMap.get(CacheKeys.stateModelDef.name())); - when(mock.cache.getEnabledLiveInstances()).thenReturn(new HashSet<>( + when(mock.cache.getAssignableEnabledLiveInstances()).thenReturn(new HashSet<>( ((Map>) cacheMap.get(CacheKeys.preferenceList.name())).values().iterator().next())); when(mock.cache.getLiveInstances()).thenReturn(new HashSet<>( ((Map>) cacheMap.get(CacheKeys.preferenceList.name())).values().iterator().next()).stream() diff --git a/helix-core/src/test/java/org/apache/helix/integration/controller/TestOfflineNodeTimeoutDuringMaintenanceMode.java b/helix-core/src/test/java/org/apache/helix/integration/controller/TestOfflineNodeTimeoutDuringMaintenanceMode.java index 742020b20f..d39dd1fd2d 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/controller/TestOfflineNodeTimeoutDuringMaintenanceMode.java +++ b/helix-core/src/test/java/org/apache/helix/integration/controller/TestOfflineNodeTimeoutDuringMaintenanceMode.java @@ -208,15 +208,15 @@ public void testOfflineNodeTimeoutDuringMaintenanceModeTimestampsMock() new ResourceControllerDataProvider(CLUSTER_NAME); resourceControllerDataProvider.refresh(_helixDataAccessor); Assert - .assertFalse(resourceControllerDataProvider.getLiveInstances().containsKey(instance3)); + .assertFalse(resourceControllerDataProvider.getAssignableLiveInstances().containsKey(instance3)); Assert - .assertFalse(resourceControllerDataProvider.getLiveInstances().containsKey(instance4)); - Assert.assertTrue(resourceControllerDataProvider.getLiveInstances().containsKey(instance5)); + .assertFalse(resourceControllerDataProvider.getAssignableLiveInstances().containsKey(instance4)); + Assert.assertTrue(resourceControllerDataProvider.getAssignableLiveInstances().containsKey(instance5)); Assert - .assertFalse(resourceControllerDataProvider.getLiveInstances().containsKey(instance6)); + .assertFalse(resourceControllerDataProvider.getAssignableLiveInstances().containsKey(instance6)); Assert - .assertFalse(resourceControllerDataProvider.getLiveInstances().containsKey(instance7)); - Assert.assertTrue(resourceControllerDataProvider.getLiveInstances().containsKey(instance8)); + .assertFalse(resourceControllerDataProvider.getAssignableLiveInstances().containsKey(instance7)); + Assert.assertTrue(resourceControllerDataProvider.getAssignableLiveInstances().containsKey(instance8)); } /** diff --git a/helix-core/src/test/java/org/apache/helix/integration/messaging/TestP2PMessageSemiAuto.java b/helix-core/src/test/java/org/apache/helix/integration/messaging/TestP2PMessageSemiAuto.java index a324a33000..f01af8a7f8 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/messaging/TestP2PMessageSemiAuto.java +++ b/helix-core/src/test/java/org/apache/helix/integration/messaging/TestP2PMessageSemiAuto.java @@ -208,7 +208,7 @@ private void verifyP2PMessage(String dbName, String instance, String expectedSta ResourceControllerDataProvider dataCache = new ResourceControllerDataProvider(CLUSTER_NAME); dataCache.refresh(_accessor); - Map liveInstanceMap = dataCache.getLiveInstances(); + Map liveInstanceMap = dataCache.getAssignableLiveInstances(); LiveInstance liveInstance = liveInstanceMap.get(instance); Map currentStateMap = diff --git a/helix-core/src/test/java/org/apache/helix/integration/messaging/TestP2PNoDuplicatedMessage.java b/helix-core/src/test/java/org/apache/helix/integration/messaging/TestP2PNoDuplicatedMessage.java index 67d02a421b..33cdbd378f 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/messaging/TestP2PNoDuplicatedMessage.java +++ b/helix-core/src/test/java/org/apache/helix/integration/messaging/TestP2PNoDuplicatedMessage.java @@ -192,7 +192,7 @@ private void verifyP2P(long startTime, String instance, boolean enabled) throws private void verifyP2PDisabled() { ResourceControllerDataProvider dataCache = new ResourceControllerDataProvider(CLUSTER_NAME); dataCache.refresh(_accessor); - Map liveInstanceMap = dataCache.getLiveInstances(); + Map liveInstanceMap = dataCache.getAssignableLiveInstances(); for (LiveInstance instance : liveInstanceMap.values()) { Map currentStateMap = @@ -218,7 +218,7 @@ private void verifyP2PDisabled() { private void verifyP2PEnabled(long startTime) { ResourceControllerDataProvider dataCache = new ResourceControllerDataProvider(CLUSTER_NAME); dataCache.refresh(_accessor); - Map liveInstanceMap = dataCache.getLiveInstances(); + Map liveInstanceMap = dataCache.getAssignableLiveInstances(); for (LiveInstance instance : liveInstanceMap.values()) { Map currentStateMap = diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestAutoRebalance.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestAutoRebalance.java index f668d16038..3c389ac3a8 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestAutoRebalance.java +++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestAutoRebalance.java @@ -293,13 +293,13 @@ public boolean verify() { int replicas = Integer.parseInt(cache.getIdealState(_resourceName).getReplicas()); String instanceGroupTag = cache.getIdealState(_resourceName).getInstanceGroupTag(); int instances = 0; - for (String liveInstanceName : cache.getLiveInstances().keySet()) { - if (cache.getInstanceConfigMap().get(liveInstanceName).containsTag(instanceGroupTag)) { + for (String liveInstanceName : cache.getAssignableLiveInstances().keySet()) { + if (cache.getAssignableInstanceConfigMap().get(liveInstanceName).containsTag(instanceGroupTag)) { instances++; } } if (instances == 0) { - instances = cache.getLiveInstances().size(); + instances = cache.getAssignableLiveInstances().size(); } ExternalView ev = accessor.getProperty(keyBuilder.externalView(_resourceName)); if (ev == null) { diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestAutoRebalancePartitionLimit.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestAutoRebalancePartitionLimit.java index 0fb203e316..49800328d0 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestAutoRebalancePartitionLimit.java +++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestAutoRebalancePartitionLimit.java @@ -222,7 +222,7 @@ public boolean verify() { try { return verifyBalanceExternalView( accessor.getProperty(keyBuilder.externalView(_resourceName)).getRecord(), - numberOfPartitions, masterValue, replicas, cache.getLiveInstances().size(), + numberOfPartitions, masterValue, replicas, cache.getAssignableLiveInstances().size(), cache.getIdealState(_resourceName).getMaxPartitionsPerInstance()); } catch (Exception e) { LOG.debug("Verify failed", e); diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestCustomRebalancer.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestCustomRebalancer.java index 1bf67ccae1..07a0a5de56 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestCustomRebalancer.java +++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestCustomRebalancer.java @@ -68,7 +68,7 @@ public void testDisabledBootstrappingPartitions() { when(cache.getStateModelDef(stateModelName)).thenReturn(stateModelDef); when(cache.getDisabledInstancesForPartition(resource.getResourceName(), partitionName)) .thenReturn(ImmutableSet.of(instanceName)); - when(cache.getLiveInstances()) + when(cache.getAssignableLiveInstances()) .thenReturn(ImmutableMap.of(instanceName, new LiveInstance(instanceName))); CurrentStateOutput currOutput = new CurrentStateOutput(); diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestCustomizedIdealStateRebalancer.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestCustomizedIdealStateRebalancer.java index f7fa7ddd1c..4f52d2ecc0 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestCustomizedIdealStateRebalancer.java +++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestCustomizedIdealStateRebalancer.java @@ -62,7 +62,7 @@ public void init(HelixManager manager) { public IdealState computeNewIdealState(String resourceName, IdealState currentIdealState, CurrentStateOutput currentStateOutput, ResourceControllerDataProvider clusterData) { testRebalancerInvoked = true; - List liveNodes = Lists.newArrayList(clusterData.getLiveInstances().keySet()); + List liveNodes = Lists.newArrayList(clusterData.getAssignableLiveInstances().keySet()); int i = 0; for (String partition : currentIdealState.getPartitionSet()) { int index = i++ % liveNodes.size(); @@ -139,13 +139,13 @@ public boolean verify() { int replicas = Integer.parseInt(cache.getIdealState(_resourceName).getReplicas()); String instanceGroupTag = cache.getIdealState(_resourceName).getInstanceGroupTag(); int instances = 0; - for (String liveInstanceName : cache.getLiveInstances().keySet()) { - if (cache.getInstanceConfigMap().get(liveInstanceName).containsTag(instanceGroupTag)) { + for (String liveInstanceName : cache.getAssignableLiveInstances().keySet()) { + if (cache.getAssignableInstanceConfigMap().get(liveInstanceName).containsTag(instanceGroupTag)) { instances++; } } if (instances == 0) { - instances = cache.getLiveInstances().size(); + instances = cache.getAssignableLiveInstances().size(); } return verifyBalanceExternalView( accessor.getProperty(keyBuilder.externalView(_resourceName)).getRecord(), diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java index 9ccc14fdfa..b7c90d8412 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java +++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java @@ -41,6 +41,7 @@ import org.apache.helix.participant.statemachine.StateModelFactory; import org.apache.helix.participant.statemachine.StateModelInfo; import org.apache.helix.participant.statemachine.Transition; +import org.apache.helix.tools.ClusterVerifiers.BestPossibleExternalViewVerifier; import org.apache.helix.tools.ClusterVerifiers.StrictMatchExternalViewVerifier; import org.apache.helix.tools.ClusterVerifiers.ZkHelixClusterVerifier; import org.testng.Assert; @@ -61,7 +62,8 @@ public class TestInstanceOperation extends ZkTestBase { protected static final String HOST = "host"; protected static final String LOGICAL_ID = "logicalId"; protected static final String TOPOLOGY = String.format("%s/%s/%s", ZONE, HOST, LOGICAL_ID); - + protected static final ImmutableSet TOP_STATE_SET = + ImmutableSet.of("MASTER"); protected static final ImmutableSet SECONDARY_STATE_SET = ImmutableSet.of("SLAVE", "STANDBY"); protected static final ImmutableSet ACCEPTABLE_STATE_SET = @@ -73,6 +75,7 @@ public class TestInstanceOperation extends ZkTestBase { List _participantNames = new ArrayList<>(); private Set _allDBs = new HashSet<>(); private ZkHelixClusterVerifier _clusterVerifier; + private ZkHelixClusterVerifier _bestPossibleClusterVerifier; private ConfigAccessor _configAccessor; private long _stateModelDelay = 3L; @@ -102,6 +105,10 @@ public void beforeClass() throws Exception { .setResources(_allDBs) .setWaitTillVerify(TestHelper.DEFAULT_REBALANCE_PROCESSING_WAIT_TIME) .build(); + _bestPossibleClusterVerifier = new BestPossibleExternalViewVerifier.Builder(CLUSTER_NAME).setZkAddr(ZK_ADDR) + .setResources(_allDBs) + .setWaitTillVerify(TestHelper.DEFAULT_REBALANCE_PROCESSING_WAIT_TIME) + .build(); enablePersistBestPossibleAssignment(_gZkClient, CLUSTER_NAME, true); _configAccessor = new ConfigAccessor(_gZkClient); _dataAccessor = new ZKHelixDataAccessor(CLUSTER_NAME, _baseAccessor); @@ -122,6 +129,7 @@ private void setupClusterConfig() { clusterConfig.setDelayRebalaceEnabled(true); clusterConfig.setRebalanceDelayTime(1800000L); _configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig); + enabledTopologyAwareRebalance(); Assert.assertTrue(_clusterVerifier.verifyByPolling()); } @@ -152,8 +160,8 @@ private void resetInstances() { for (int i = 0; i < _participants.size(); i++) { String participantName = _participantNames.get(i); if (!_originalParticipantNames.contains(participantName)) { - _participants.get(i).syncStop(); _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME, participantName, false); + _participants.get(i).syncStop(); _gSetupTool.getClusterManagementTool() .dropInstance(CLUSTER_NAME, _gSetupTool.getClusterManagementTool().getInstanceConfig(CLUSTER_NAME, participantName)); droppedParticipants.add(participantName); @@ -390,6 +398,7 @@ public void testMarkEvacuationAfterEMM() throws Exception { null); addParticipant(PARTICIPANT_PREFIX + "_" + (START_PORT + NUM_NODE)); + Assert.assertTrue(_clusterVerifier.verifyByPolling()); Map assignment = getEVs(); for (String resource : _allDBs) { @@ -680,7 +689,7 @@ public void testNodeSwap() throws Exception { // and adding the SWAP_IN instance to the cluster. // Check that the SWAP_IN instance has the same partitions as the SWAP_OUT instance // but none of them are in a top state. - Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Set.of(instanceToSwapInName), Collections.emptySet()); @@ -747,7 +756,7 @@ public void testNodeSwapSwapInNodeNoInstanceOperationDisabled() throws Exception // and adding the SWAP_IN instance to the cluster. // Check that the SWAP_IN instance has the same partitions as the SWAP_OUT instance // but none of them are in a top state. - Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Set.of(instanceToSwapInName), Collections.emptySet()); @@ -793,7 +802,7 @@ public void testNodeSwapCancelSwapWhenReadyToComplete() throws Exception { InstanceConstants.InstanceOperation.SWAP_OUT); // Validate that the assignment has not changed since setting the InstanceOperation to SWAP_OUT - Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Collections.emptySet(), Collections.emptySet()); @@ -808,7 +817,7 @@ public void testNodeSwapCancelSwapWhenReadyToComplete() throws Exception { // and adding the SWAP_IN instance to the cluster. // Check that the SWAP_IN instance has the same partitions as the SWAP_OUT instance // but none of them are in a top state. - Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Set.of(instanceToSwapInName), Collections.emptySet()); @@ -880,7 +889,7 @@ public void testNodeSwapAfterEMM() throws Exception { // Validate that the assignment has not changed since adding the SWAP_IN node. // During MM, the cluster should not compute new assignment. - Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Collections.emptySet(), Collections.emptySet()); @@ -892,7 +901,7 @@ public void testNodeSwapAfterEMM() throws Exception { // Validate that partitions on SWAP_OUT instance does not change after exiting MM // Check that the SWAP_IN instance has the same partitions as the SWAP_OUT instance // but none of them are in a top state. - Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Set.of(instanceToSwapInName), Collections.emptySet()); @@ -956,16 +965,12 @@ public void testNodeSwapWithSwapOutInstanceDisabled() throws Exception { instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), InstanceConstants.InstanceOperation.SWAP_IN, true, -1); - Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); // Validate that the SWAP_IN instance has the same partitions as the SWAP_OUT instance in second top state. Map swapInInstancePartitionsAndStates = getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapInName); - Assert.assertTrue( - swapInInstancePartitionsAndStates.keySet().containsAll(swapOutInstanceOriginalPartitions)); - Set swapInInstanceStates = new HashSet<>(swapInInstancePartitionsAndStates.values()); - swapInInstanceStates.removeAll(SECONDARY_STATE_SET); - Assert.assertEquals(swapInInstanceStates.size(), 0); + Assert.assertEquals(swapInInstancePartitionsAndStates.keySet().size(), 0); // Assert canSwapBeCompleted is false because SWAP_OUT instance is disabled. Assert.assertFalse(_gSetupTool.getClusterManagementTool() @@ -975,23 +980,22 @@ public void testNodeSwapWithSwapOutInstanceDisabled() throws Exception { _gSetupTool.getClusterManagementTool() .enableInstance(CLUSTER_NAME, instanceToSwapOutName, true); - Assert.assertTrue(_clusterVerifier.verifyByPolling()); - - // Assert completeSwapIfPossible is true - Assert.assertTrue(_gSetupTool.getClusterManagementTool() - .completeSwapIfPossible(CLUSTER_NAME, instanceToSwapOutName)); + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); // Validate that the SWAP_IN instance has the same partitions the SWAP_OUT instance originally - // had. Validate they are in second top state because initially disabling SWAP_OUT instance - // caused all topStates to be handed off to next replica in the preference list. + // had. Validate they are in second top state. swapInInstancePartitionsAndStates = getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapInName); Assert.assertTrue( swapInInstancePartitionsAndStates.keySet().containsAll(swapOutInstanceOriginalPartitions)); - swapInInstanceStates = new HashSet<>(swapInInstancePartitionsAndStates.values()); + Set swapInInstanceStates = new HashSet<>(swapInInstancePartitionsAndStates.values()); swapInInstanceStates.removeAll(SECONDARY_STATE_SET); Assert.assertEquals(swapInInstanceStates.size(), 0); + // Assert completeSwapIfPossible is true + Assert.assertTrue(_gSetupTool.getClusterManagementTool() + .completeSwapIfPossible(CLUSTER_NAME, instanceToSwapOutName)); + Assert.assertTrue(_clusterVerifier.verifyByPolling()); // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. @@ -1036,7 +1040,7 @@ public void testNodeSwapAddSwapInFirstEnableBeforeSwapOutSet() { addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, false, -1); - Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); // Enable the SWAP_IN instance before we have set the SWAP_OUT instance. _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME, instanceToSwapInName, true); @@ -1059,7 +1063,7 @@ public void testUnsetInstanceOperationOnSwapInWhenAlreadyUnsetOnSwapOut() { addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, false, -1); - Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); // Try to remove the InstanceOperation from the SWAP_IN instance before the SWAP_OUT instance is set. // This should throw exception because we cannot ever have two instances with the same logicalId and both have InstanceOperation @@ -1108,7 +1112,7 @@ public void testNodeSwapAddSwapInFirst() { // and adding the SWAP_IN instance to the cluster. // Check that the SWAP_IN instance has the same partitions as the SWAP_OUT instance // but none of them are in a top state. - Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Set.of(instanceToSwapInName), Collections.emptySet()); diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/WagedRebalancer/TestWagedNodeSwap.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/WagedRebalancer/TestWagedNodeSwap.java index 588de917a0..f6ef8279dc 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/WagedRebalancer/TestWagedNodeSwap.java +++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/WagedRebalancer/TestWagedNodeSwap.java @@ -174,7 +174,7 @@ public void testNodeSwap() throws Exception { _gSetupTool.addInstanceToCluster(CLUSTER_NAME, newParticipantName); InstanceConfig newConfig = configAccessor.getInstanceConfig(CLUSTER_NAME, newParticipantName); String zone = instanceConfig.getDomainAsMap().get("zone"); - String domain = String.format("zone=%s,instance=%s", zone, newParticipantName); + String domain = String.format("zone=%s,instance=%s", zone, oldParticipantName); newConfig.setDomain(domain); _gSetupTool.getClusterManagementTool() .setInstanceConfig(CLUSTER_NAME, newParticipantName, newConfig); diff --git a/helix-core/src/test/java/org/apache/helix/messaging/p2pMessage/TestP2PMessages.java b/helix-core/src/test/java/org/apache/helix/messaging/p2pMessage/TestP2PMessages.java index 649ff7ec13..1002ee7dd9 100644 --- a/helix-core/src/test/java/org/apache/helix/messaging/p2pMessage/TestP2PMessages.java +++ b/helix-core/src/test/java/org/apache/helix/messaging/p2pMessage/TestP2PMessages.java @@ -112,8 +112,8 @@ public void beforeClass() { e.printStackTrace(); } - _instances = _dataCache.getAllInstances(); - _liveInstanceMap = _dataCache.getLiveInstances(); + _instances = _dataCache.getAssignableInstances(); + _liveInstanceMap = _dataCache.getAssignableLiveInstances(); _initialStateMap = event.getAttribute(AttributeName.BEST_POSSIBLE_STATE.name()); _initialMaster = getTopStateInstance(_initialStateMap.getInstanceStateMap(_db, _partition), @@ -218,7 +218,7 @@ public void testP2PWithErrorState() throws Exception { // Old master (initialMaster) failed the M->S transition, // but has not forward p2p message to new master (secondMaster) yet. // Validate: Controller should ignore the ERROR partition and send S->M message to new master. - String session = _dataCache.getLiveInstances().get(_initialMaster).getEphemeralOwner(); + String session = _dataCache.getAssignableLiveInstances().get(_initialMaster).getEphemeralOwner(); PropertyKey currentStateKey = new PropertyKey.Builder(_clusterName).currentState(_initialMaster, session, _db); CurrentState currentState = accessor.getProperty(currentStateKey); @@ -308,7 +308,7 @@ public void testP2PWithInstanceOffline() throws Exception { private void handleMessage(String instance, String resource) { PropertyKey propertyKey = new PropertyKey.Builder(_clusterName).messages(instance); List messages = accessor.getChildValues(propertyKey, true); - String session = _dataCache.getLiveInstances().get(instance).getEphemeralOwner(); + String session = _dataCache.getAssignableLiveInstances().get(instance).getEphemeralOwner(); for (Message m : messages) { if (m.getResourceName().equals(resource)) { diff --git a/helix-core/src/test/java/org/apache/helix/task/TestTargetedTaskStateChange.java b/helix-core/src/test/java/org/apache/helix/task/TestTargetedTaskStateChange.java index e849be28c4..d8810b153d 100644 --- a/helix-core/src/test/java/org/apache/helix/task/TestTargetedTaskStateChange.java +++ b/helix-core/src/test/java/org/apache/helix/task/TestTargetedTaskStateChange.java @@ -86,8 +86,8 @@ public void testTwoRunningCurrentStates() { when(mock._cache.getTaskDataCache()).thenReturn(mock._taskDataCache); when(mock._cache.getJobContext(JOB_NAME)).thenReturn(mock._jobContext); when(mock._cache.getIdealStates()).thenReturn(mock._idealStates); - when(mock._cache.getEnabledLiveInstances()).thenReturn(_liveInstances.keySet()); - when(mock._cache.getInstanceConfigMap()).thenReturn(_instanceConfigs); + when(mock._cache.getAssignableEnabledLiveInstances()).thenReturn(_liveInstances.keySet()); + when(mock._cache.getAssignableInstanceConfigMap()).thenReturn(_instanceConfigs); when(mock._cache.getClusterConfig()).thenReturn(_clusterConfig); when(mock._taskDataCache.getRuntimeJobDag(WORKFLOW_NAME)).thenReturn(mock._runtimeJobDag); _assignableInstanceManager.buildAssignableInstances(_clusterConfig, mock._taskDataCache, @@ -123,8 +123,8 @@ public void testOneRunningOneNull() { when(mock._cache.getTaskDataCache()).thenReturn(mock._taskDataCache); when(mock._cache.getJobContext(JOB_NAME)).thenReturn(mock._jobContext); when(mock._cache.getIdealStates()).thenReturn(mock._idealStates); - when(mock._cache.getEnabledLiveInstances()).thenReturn(_liveInstances.keySet()); - when(mock._cache.getInstanceConfigMap()).thenReturn(_instanceConfigs); + when(mock._cache.getAssignableEnabledLiveInstances()).thenReturn(_liveInstances.keySet()); + when(mock._cache.getAssignableInstanceConfigMap()).thenReturn(_instanceConfigs); when(mock._cache.getClusterConfig()).thenReturn(_clusterConfig); when(mock._taskDataCache.getRuntimeJobDag(WORKFLOW_NAME)).thenReturn(mock._runtimeJobDag); _assignableInstanceManager.buildAssignableInstances(_clusterConfig, mock._taskDataCache, diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java b/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java index 6b357a384e..d0f0c57151 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java @@ -322,7 +322,7 @@ private void setupZooKeepers() { } protected void setupHelixResources() throws Exception { - _clusters = createClusters(4); + _clusters = createClusters(5); _gSetupTool.addCluster(_superCluster, true); _gSetupTool.addCluster(TASK_TEST_CLUSTER, true); _clusters.add(_superCluster); diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java b/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java index 92dfff0024..5cfab76a06 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java @@ -45,7 +45,7 @@ import org.testng.annotations.Test; public class TestInstancesAccessor extends AbstractTestClass { - private final static String CLUSTER_NAME = "TestCluster_0"; + private final static String CLUSTER_NAME = "TestCluster_4"; @DataProvider public Object[][] generatePayloadCrossZoneStoppableCheckWithZoneOrder() { From 70b05d57f80db3a7ac4115fff7ca83ae8c0f0aa1 Mon Sep 17 00:00:00 2001 From: Zachary Pinto Date: Wed, 6 Dec 2023 17:43:43 -0800 Subject: [PATCH 08/11] Make logic to determine state of replicas on SWAP_IN instance simpler and more predictable during an in-flight node swap. (#2706) --- .../controller/rebalancer/AbstractRebalancer.java | 4 ++-- .../controller/stages/BestPossibleStateCalcStage.java | 10 +++++++--- .../helix/controller/stages/MessageSelectionStage.java | 4 ++-- .../java/org/apache/helix/examples/Quickstart.java | 2 +- .../java/org/apache/helix/manager/zk/ZKHelixAdmin.java | 5 +++-- .../java/org/apache/helix/model/LeaderStandbySMD.java | 5 +++-- .../java/org/apache/helix/model/MasterSlaveSMD.java | 5 +++-- .../java/org/apache/helix/model/OnlineOfflineSMD.java | 5 +++-- .../helix/model/OnlineOfflineWithBootstrapSMD.java | 3 ++- .../org/apache/helix/model/StateModelDefinition.java | 10 ++++++---- .../org/apache/helix/model/StorageSchemataSMD.java | 5 +++-- .../model/util/StateModelDefinitionValidator.java | 3 ++- .../main/java/org/apache/helix/util/RebalanceUtil.java | 5 +++-- .../src/test/java/org/apache/helix/TestHelper.java | 2 +- .../TestPartitionLevelTransitionConstraint.java | 2 +- .../helix/integration/TestPreferenceListAsQueue.java | 3 ++- .../integration/messaging/TestMessageThrottle2.java | 2 +- .../org/apache/helix/model/TestStateModelValidity.java | 2 +- 18 files changed, 46 insertions(+), 31 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java index 7a23b8f280..51158cb911 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/AbstractRebalancer.java @@ -329,9 +329,9 @@ public static int getStateCount(String state, StateModelDefinition stateModelDef int preferenceListSize) { String num = stateModelDef.getNumInstancesPerState(state); int stateCount = -1; - if ("N".equals(num)) { + if (StateModelDefinition.STATE_REPLICA_COUNT_ALL_CANDIDATE_NODES.equals(num)) { stateCount = liveAndEnabledSize; - } else if ("R".equals(num)) { + } else if (StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS.equals(num)) { stateCount = preferenceListSize; } else { try { diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java index 8ec4b44757..05652e222d 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/BestPossibleStateCalcStage.java @@ -37,7 +37,6 @@ import org.apache.helix.controller.dataproviders.ResourceControllerDataProvider; import org.apache.helix.controller.pipeline.AbstractBaseStage; import org.apache.helix.controller.pipeline.StageException; -import org.apache.helix.controller.rebalancer.AbstractRebalancer; import org.apache.helix.controller.rebalancer.CustomRebalancer; import org.apache.helix.controller.rebalancer.DelayedAutoRebalancer; import org.apache.helix.controller.rebalancer.MaintenanceRebalancer; @@ -150,8 +149,13 @@ private void addSwapInInstancesToBestPossibleState(Map resourc commonInstances.forEach(swapOutInstance -> { if (stateMap.get(swapOutInstance).equals(stateModelDef.getTopState())) { - if (AbstractRebalancer.getStateCount(stateModelDef.getTopState(), stateModelDef, - stateMap.size() + 1, stateMap.size() + 1) > stateMap.size()) { + + String topStateCount = + stateModelDef.getNumInstancesPerState(stateModelDef.getTopState()); + if (topStateCount.equals( + StateModelDefinition.STATE_REPLICA_COUNT_ALL_CANDIDATE_NODES) + || topStateCount.equals( + StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS)) { // If the swap-out instance's replica is a topState and the StateModel allows for // another replica with the topState to be added, set the swap-in instance's replica // to the topState. diff --git a/helix-core/src/main/java/org/apache/helix/controller/stages/MessageSelectionStage.java b/helix-core/src/main/java/org/apache/helix/controller/stages/MessageSelectionStage.java index 09894263f0..2751f1b26a 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/stages/MessageSelectionStage.java +++ b/helix-core/src/main/java/org/apache/helix/controller/stages/MessageSelectionStage.java @@ -262,9 +262,9 @@ private Map computeStateConstraints(StateModelDefinition stateMo for (String state : statePriorityList) { String numInstancesPerState = stateModelDefinition.getNumInstancesPerState(state); int max = -1; - if ("N".equals(numInstancesPerState)) { + if (StateModelDefinition.STATE_REPLICA_COUNT_ALL_CANDIDATE_NODES.equals(numInstancesPerState)) { max = cache.getLiveInstances().size(); - } else if ("R".equals(numInstancesPerState)) { + } else if (StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS.equals(numInstancesPerState)) { // idealState is null when resource has been dropped, // R can't be evaluated and ignore state constraints //if (idealState != null) { diff --git a/helix-core/src/main/java/org/apache/helix/examples/Quickstart.java b/helix-core/src/main/java/org/apache/helix/examples/Quickstart.java index 5d1df0a02c..9cc14b6039 100644 --- a/helix-core/src/main/java/org/apache/helix/examples/Quickstart.java +++ b/helix-core/src/main/java/org/apache/helix/examples/Quickstart.java @@ -124,7 +124,7 @@ private static StateModelDefinition defineStateModel() { builder.upperBound(LEADER, 1); // dynamic constraint, R means it should be derived based on the replication // factor. - builder.dynamicUpperBound(STANDBY, "R"); + builder.dynamicUpperBound(STANDBY, StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS); StateModelDefinition statemodelDefinition = builder.build(); return statemodelDefinition; diff --git a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java index 7a0fe6377e..8a8d13b7c0 100644 --- a/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java +++ b/helix-core/src/main/java/org/apache/helix/manager/zk/ZKHelixAdmin.java @@ -2103,12 +2103,13 @@ void rebalance(String clusterName, String resourceName, int replica, String keyP throw new HelixException("Invalid or unsupported state model definition"); } masterStateValue = state; - } else if (count.equalsIgnoreCase("R")) { + } else if (count.equalsIgnoreCase(StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS)) { if (slaveStateValue != null) { throw new HelixException("Invalid or unsupported state model definition"); } slaveStateValue = state; - } else if (count.equalsIgnoreCase("N")) { + } else if (count.equalsIgnoreCase( + StateModelDefinition.STATE_REPLICA_COUNT_ALL_CANDIDATE_NODES)) { if (!(masterStateValue == null && slaveStateValue == null)) { throw new HelixException("Invalid or unsupported state model definition"); } diff --git a/helix-core/src/main/java/org/apache/helix/model/LeaderStandbySMD.java b/helix-core/src/main/java/org/apache/helix/model/LeaderStandbySMD.java index e7c92a9eab..0d400817cd 100644 --- a/helix-core/src/main/java/org/apache/helix/model/LeaderStandbySMD.java +++ b/helix-core/src/main/java/org/apache/helix/model/LeaderStandbySMD.java @@ -68,7 +68,8 @@ public static StateModelDefinition build() { // bounds builder.upperBound(States.LEADER.name(), 1); - builder.dynamicUpperBound(States.STANDBY.name(), "R"); + builder.dynamicUpperBound(States.STANDBY.name(), + StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS); return builder.build(); } @@ -97,7 +98,7 @@ public static ZNRecord generateConfigForLeaderStandby() { record.setMapField(key, metadata); } if (state.equals("STANDBY")) { - metadata.put("count", "R"); + metadata.put("count", StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS); record.setMapField(key, metadata); } if (state.equals("OFFLINE")) { diff --git a/helix-core/src/main/java/org/apache/helix/model/MasterSlaveSMD.java b/helix-core/src/main/java/org/apache/helix/model/MasterSlaveSMD.java index 02900a27ac..09b06b27a7 100644 --- a/helix-core/src/main/java/org/apache/helix/model/MasterSlaveSMD.java +++ b/helix-core/src/main/java/org/apache/helix/model/MasterSlaveSMD.java @@ -69,7 +69,8 @@ public static StateModelDefinition build() { // bounds builder.upperBound(States.MASTER.name(), 1); - builder.dynamicUpperBound(States.SLAVE.name(), "R"); + builder.dynamicUpperBound(States.SLAVE.name(), + StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS); return builder.build(); } @@ -98,7 +99,7 @@ public static ZNRecord generateConfigForMasterSlave() { metadata.put("count", "1"); record.setMapField(key, metadata); } else if (state.equals("SLAVE")) { - metadata.put("count", "R"); + metadata.put("count", StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS); record.setMapField(key, metadata); } else if (state.equals("OFFLINE")) { metadata.put("count", "-1"); diff --git a/helix-core/src/main/java/org/apache/helix/model/OnlineOfflineSMD.java b/helix-core/src/main/java/org/apache/helix/model/OnlineOfflineSMD.java index 3f3759d8d6..fd97c7ba9f 100644 --- a/helix-core/src/main/java/org/apache/helix/model/OnlineOfflineSMD.java +++ b/helix-core/src/main/java/org/apache/helix/model/OnlineOfflineSMD.java @@ -63,7 +63,8 @@ public static StateModelDefinition build() { builder.addTransition(States.OFFLINE.name(), HelixDefinedState.DROPPED.name()); // bounds - builder.dynamicUpperBound(States.ONLINE.name(), "R"); + builder.dynamicUpperBound(States.ONLINE.name(), + StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS); return builder.build(); } @@ -87,7 +88,7 @@ public static ZNRecord generateConfigForOnlineOffline() { String key = state + ".meta"; Map metadata = new HashMap(); if (state.equals("ONLINE")) { - metadata.put("count", "R"); + metadata.put("count", StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS); record.setMapField(key, metadata); } if (state.equals("OFFLINE")) { diff --git a/helix-core/src/main/java/org/apache/helix/model/OnlineOfflineWithBootstrapSMD.java b/helix-core/src/main/java/org/apache/helix/model/OnlineOfflineWithBootstrapSMD.java index 90ccbde4ae..58acf02a22 100644 --- a/helix-core/src/main/java/org/apache/helix/model/OnlineOfflineWithBootstrapSMD.java +++ b/helix-core/src/main/java/org/apache/helix/model/OnlineOfflineWithBootstrapSMD.java @@ -67,7 +67,8 @@ public static OnlineOfflineWithBootstrapSMD build() { builder.addTransition(States.OFFLINE.name(), HelixDefinedState.DROPPED.name()); // bounds - builder.dynamicUpperBound(States.ONLINE.name(), "R"); + builder.dynamicUpperBound(States.ONLINE.name(), + StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS); return new OnlineOfflineWithBootstrapSMD(builder.build().getRecord()); } diff --git a/helix-core/src/main/java/org/apache/helix/model/StateModelDefinition.java b/helix-core/src/main/java/org/apache/helix/model/StateModelDefinition.java index 9570dfeb3e..fcf24fb305 100644 --- a/helix-core/src/main/java/org/apache/helix/model/StateModelDefinition.java +++ b/helix-core/src/main/java/org/apache/helix/model/StateModelDefinition.java @@ -49,6 +49,8 @@ public enum StateModelDefinitionProperty { } public static final int TOP_STATE_PRIORITY = 1; + public static final String STATE_REPLICA_COUNT_ALL_CANDIDATE_NODES = "N"; + public static final String STATE_REPLICA_COUNT_ALL_REPLICAS = "R"; /** * state model's initial state @@ -200,7 +202,7 @@ public String getInitialState() { /** * Number of instances that can be in each state * @param state the state name - * @return maximum instance count per state, can be "N" or "R" + * @return maximum instance count per state, can be STATE_REPLICA_COUNT_ALL_NODES or STATE_REPLICA_COUNT_ALL_REPLICAS */ public String getNumInstancesPerState(String state) { return _statesCountMap.get(state); @@ -449,11 +451,11 @@ public LinkedHashMap getStateCountMap(int candidateNodeNum, int if (candidateNodeNum <= 0) { break; } - if ("N".equals(num)) { + if (STATE_REPLICA_COUNT_ALL_CANDIDATE_NODES.equals(num)) { stateCountMap.put(state, candidateNodeNum); replicas -= candidateNodeNum; break; - } else if ("R".equals(num)) { + } else if (STATE_REPLICA_COUNT_ALL_REPLICAS.equals(num)) { // wait until we get the counts for all other states continue; } else { @@ -475,7 +477,7 @@ public LinkedHashMap getStateCountMap(int candidateNodeNum, int // get state count for R for (String state : statesPriorityList) { String num = getNumInstancesPerState(state); - if ("R".equals(num)) { + if (STATE_REPLICA_COUNT_ALL_REPLICAS.equals(num)) { if (candidateNodeNum > 0 && replicas > 0) { stateCountMap.put(state, replicas < candidateNodeNum ? replicas : candidateNodeNum); } diff --git a/helix-core/src/main/java/org/apache/helix/model/StorageSchemataSMD.java b/helix-core/src/main/java/org/apache/helix/model/StorageSchemataSMD.java index ea3fb4d9ff..c19e3c44d0 100644 --- a/helix-core/src/main/java/org/apache/helix/model/StorageSchemataSMD.java +++ b/helix-core/src/main/java/org/apache/helix/model/StorageSchemataSMD.java @@ -63,7 +63,8 @@ public static StateModelDefinition build() { builder.addTransition(States.OFFLINE.name(), HelixDefinedState.DROPPED.name()); // bounds - builder.dynamicUpperBound(States.MASTER.name(), "N"); + builder.dynamicUpperBound(States.MASTER.name(), + StateModelDefinition.STATE_REPLICA_COUNT_ALL_CANDIDATE_NODES); return builder.build(); } @@ -88,7 +89,7 @@ public static ZNRecord generateConfigForStorageSchemata() { String key = state + ".meta"; Map metadata = new HashMap(); if (state.equals("MASTER")) { - metadata.put("count", "N"); + metadata.put("count", StateModelDefinition.STATE_REPLICA_COUNT_ALL_CANDIDATE_NODES); record.setMapField(key, metadata); } else if (state.equals("OFFLINE")) { metadata.put("count", "-1"); diff --git a/helix-core/src/main/java/org/apache/helix/model/util/StateModelDefinitionValidator.java b/helix-core/src/main/java/org/apache/helix/model/util/StateModelDefinitionValidator.java index b208efa693..7eb2047cc3 100644 --- a/helix-core/src/main/java/org/apache/helix/model/util/StateModelDefinitionValidator.java +++ b/helix-core/src/main/java/org/apache/helix/model/util/StateModelDefinitionValidator.java @@ -122,7 +122,8 @@ private boolean areStateCountsValid() { try { Integer.parseInt(count); } catch (NumberFormatException e) { - if (!count.equals("N") && !count.equals("R")) { + if (!count.equals(StateModelDefinition.STATE_REPLICA_COUNT_ALL_CANDIDATE_NODES) + && !count.equals(StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS)) { _logger.error("State " + state + " has invalid count " + count + ", state model: " + _stateModelDef.getId()); return false; diff --git a/helix-core/src/main/java/org/apache/helix/util/RebalanceUtil.java b/helix-core/src/main/java/org/apache/helix/util/RebalanceUtil.java index 868e0cf577..5c7effb6f4 100644 --- a/helix-core/src/main/java/org/apache/helix/util/RebalanceUtil.java +++ b/helix-core/src/main/java/org/apache/helix/util/RebalanceUtil.java @@ -125,12 +125,13 @@ public static String[] parseStates(String clusterName, StateModelDefinition stat throw new HelixException("Invalid or unsupported state model definition"); } masterStateValue = state; - } else if (count.equalsIgnoreCase("R")) { + } else if (count.equalsIgnoreCase(StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS)) { if (slaveStateValue != null) { throw new HelixException("Invalid or unsupported state model definition"); } slaveStateValue = state; - } else if (count.equalsIgnoreCase("N")) { + } else if (count.equalsIgnoreCase( + StateModelDefinition.STATE_REPLICA_COUNT_ALL_CANDIDATE_NODES)) { if (!(masterStateValue == null && slaveStateValue == null)) { throw new HelixException("Invalid or unsupported state model definition"); } diff --git a/helix-core/src/test/java/org/apache/helix/TestHelper.java b/helix-core/src/test/java/org/apache/helix/TestHelper.java index 79f238da77..9dbba34769 100644 --- a/helix-core/src/test/java/org/apache/helix/TestHelper.java +++ b/helix-core/src/test/java/org/apache/helix/TestHelper.java @@ -693,7 +693,7 @@ public static StateModelDefinition generateStateModelDefForBootstrap() { String key = state + ".meta"; Map metadata = new HashMap(); if (state.equals("ONLINE")) { - metadata.put("count", "R"); + metadata.put("count", StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS); record.setMapField(key, metadata); } else if (state.equals("BOOTSTRAP")) { metadata.put("count", "-1"); diff --git a/helix-core/src/test/java/org/apache/helix/integration/TestPartitionLevelTransitionConstraint.java b/helix-core/src/test/java/org/apache/helix/integration/TestPartitionLevelTransitionConstraint.java index c4c37fe386..9805a8c086 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/TestPartitionLevelTransitionConstraint.java +++ b/helix-core/src/test/java/org/apache/helix/integration/TestPartitionLevelTransitionConstraint.java @@ -230,7 +230,7 @@ private static StateModelDefinition defineStateModel() { // static constraint builder.upperBound("MASTER", 1); // dynamic constraint, R means it should be derived based on the replication factor. - builder.dynamicUpperBound("SLAVE", "R"); + builder.dynamicUpperBound("SLAVE", StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS); StateModelDefinition statemodelDefinition = builder.build(); diff --git a/helix-core/src/test/java/org/apache/helix/integration/TestPreferenceListAsQueue.java b/helix-core/src/test/java/org/apache/helix/integration/TestPreferenceListAsQueue.java index 2b32c219e8..178b37a9cf 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/TestPreferenceListAsQueue.java +++ b/helix-core/src/test/java/org/apache/helix/integration/TestPreferenceListAsQueue.java @@ -266,7 +266,8 @@ private StateModelDefinition createReprioritizedStateModelDef(String stateModelN .addState("ONLINE", 1).addState("OFFLINE").addState("DROPPED").addState("ERROR") .initialState("OFFLINE").addTransition("ERROR", "OFFLINE", 1) .addTransition("ONLINE", "OFFLINE", 2).addTransition("OFFLINE", "DROPPED", 3) - .addTransition("OFFLINE", "ONLINE", 4).dynamicUpperBound("ONLINE", "R") + .addTransition("OFFLINE", "ONLINE", 4) + .dynamicUpperBound("ONLINE", StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS) .upperBound("OFFLINE", -1).upperBound("DROPPED", -1).upperBound("ERROR", -1); return builder.build(); } diff --git a/helix-core/src/test/java/org/apache/helix/integration/messaging/TestMessageThrottle2.java b/helix-core/src/test/java/org/apache/helix/integration/messaging/TestMessageThrottle2.java index b37493101e..b11e6350e5 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/messaging/TestMessageThrottle2.java +++ b/helix-core/src/test/java/org/apache/helix/integration/messaging/TestMessageThrottle2.java @@ -186,7 +186,7 @@ private ZNRecord generateConfigForMasterSlave() { record.setMapField(key, metadata); break; case "SLAVE": - metadata.put("count", "R"); + metadata.put("count", StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS); record.setMapField(key, metadata); break; case "OFFLINE": diff --git a/helix-core/src/test/java/org/apache/helix/model/TestStateModelValidity.java b/helix-core/src/test/java/org/apache/helix/model/TestStateModelValidity.java index f8955abbd8..724c3315db 100644 --- a/helix-core/src/test/java/org/apache/helix/model/TestStateModelValidity.java +++ b/helix-core/src/test/java/org/apache/helix/model/TestStateModelValidity.java @@ -230,7 +230,7 @@ public void testBasic() { .upperBound("MASTER", 1) // R indicates an upper bound of number of replicas for each partition - .dynamicUpperBound("SLAVE", "R") + .dynamicUpperBound("SLAVE", StateModelDefinition.STATE_REPLICA_COUNT_ALL_REPLICAS) // Add some high-priority transitions .addTransition("SLAVE", "MASTER", 1).addTransition("OFFLINE", "SLAVE", 2) From 3775e3d3dc51f4231e736ca71dcd285280cbf491 Mon Sep 17 00:00:00 2001 From: Zachary Pinto Date: Thu, 7 Dec 2023 17:46:59 -0800 Subject: [PATCH 09/11] Prevent the spectator routing table from containing SWAP_IN instances.(#2710) Prevent the spectator routing table from containing SWAP_IN instances. --- .../waged/model/ClusterModelProvider.java | 6 +- .../helix/spectator/RoutingDataCache.java | 70 ++++++++++- .../helix/spectator/RoutingTableProvider.java | 14 ++- .../rebalancer/TestInstanceOperation.java | 109 ++++++++++++++++++ ...RoutingTableProviderFromCurrentStates.java | 8 +- 5 files changed, 195 insertions(+), 12 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java index a869a904ef..69fec9b2ca 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java @@ -209,13 +209,15 @@ private static ClusterModel generateClusterModel(ResourceControllerDataProvider // Get the set of active logical ids. Set activeLogicalIds = activeInstances.stream().map( - instanceName -> assignableInstanceConfigMap.get(instanceName) + instanceName -> assignableInstanceConfigMap.getOrDefault(instanceName, + new InstanceConfig(instanceName)) .getLogicalId(clusterTopologyConfig.getEndNodeType())).collect(Collectors.toSet()); Set assignableLiveInstanceNames = dataProvider.getAssignableLiveInstances().keySet(); Set assignableLiveInstanceLogicalIds = assignableLiveInstanceNames.stream().map( - instanceName -> assignableInstanceConfigMap.get(instanceName) + instanceName -> assignableInstanceConfigMap.getOrDefault(instanceName, + new InstanceConfig(instanceName)) .getLogicalId(clusterTopologyConfig.getEndNodeType())).collect(Collectors.toSet()); // Generate replica objects for all the resource partitions. diff --git a/helix-core/src/main/java/org/apache/helix/spectator/RoutingDataCache.java b/helix-core/src/main/java/org/apache/helix/spectator/RoutingDataCache.java index 727bd8df9a..8872e9edac 100644 --- a/helix-core/src/main/java/org/apache/helix/spectator/RoutingDataCache.java +++ b/helix-core/src/main/java/org/apache/helix/spectator/RoutingDataCache.java @@ -23,8 +23,10 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import org.apache.helix.HelixConstants; import org.apache.helix.HelixDataAccessor; import org.apache.helix.HelixException; @@ -34,9 +36,11 @@ import org.apache.helix.common.caches.CurrentStateSnapshot; import org.apache.helix.common.caches.CustomizedViewCache; import org.apache.helix.common.caches.TargetExternalViewCache; +import org.apache.helix.constants.InstanceConstants; import org.apache.helix.model.CurrentState; import org.apache.helix.model.CustomizedView; import org.apache.helix.model.ExternalView; +import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.LiveInstance; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -47,6 +51,10 @@ class RoutingDataCache extends BasicClusterDataCache { private static Logger LOG = LoggerFactory.getLogger(RoutingDataCache.class.getName()); + // When an instance has any of these instance operations, it should not be routable. + private static final ImmutableSet NON_ROUTABLE_INSTANCE_OPERATIONS = + ImmutableSet.of(InstanceConstants.InstanceOperation.SWAP_IN.name()); + private final Map> _sourceDataTypeMap; private CurrentStateCache _currentStateCache; @@ -54,6 +62,8 @@ class RoutingDataCache extends BasicClusterDataCache { // propertyCache, this hardcoded list of fields won't be necessary. private Map _customizedViewCaches; private TargetExternalViewCache _targetExternalViewCache; + private Map _routableLiveInstanceMap; + private Map _routableInstanceConfigMap; public RoutingDataCache(String clusterName, PropertyType sourceDataType) { this (clusterName, ImmutableMap.of(sourceDataType, Collections.emptyList())); @@ -73,6 +83,8 @@ public RoutingDataCache(String clusterName, Map> sour .forEach(customizedStateType -> _customizedViewCaches.put(customizedStateType, new CustomizedViewCache(clusterName, customizedStateType))); _targetExternalViewCache = new TargetExternalViewCache(clusterName); + _routableInstanceConfigMap = new HashMap<>(); + _routableLiveInstanceMap = new HashMap<>(); requireFullRefresh(); } @@ -88,7 +100,26 @@ public synchronized void refresh(HelixDataAccessor accessor) { LOG.info("START: RoutingDataCache.refresh() for cluster " + _clusterName); long startTime = System.currentTimeMillis(); + // Store whether a refresh for routable instances is necessary, as the super.refresh() call will + // set the _propertyDataChangedMap values for the instance config and live instance change types to false. + boolean refreshRoutableInstanceConfigs = + _propertyDataChangedMap.getOrDefault(HelixConstants.ChangeType.INSTANCE_CONFIG, false); + // If there is an InstanceConfig change, update the routable instance configs and live instances. + // Must also do live instances because whether and instance is routable is based off of the instance config. + boolean refreshRoutableLiveInstances = + _propertyDataChangedMap.getOrDefault(HelixConstants.ChangeType.LIVE_INSTANCE, false) + || refreshRoutableInstanceConfigs; + super.refresh(accessor); + + if (refreshRoutableInstanceConfigs) { + updateRoutableInstanceConfigMap(_instanceConfigPropertyCache.getPropertyMap()); + } + if (refreshRoutableLiveInstances) { + updateRoutableLiveInstanceMap(getRoutableInstanceConfigMap(), + _liveInstancePropertyCache.getPropertyMap()); + } + for (PropertyType propertyType : _sourceDataTypeMap.keySet()) { long start = System.currentTimeMillis(); switch (propertyType) { @@ -114,7 +145,9 @@ public synchronized void refresh(HelixDataAccessor accessor) { * TODO: logic. **/ _liveInstancePropertyCache.refresh(accessor); - Map liveInstanceMap = getLiveInstances(); + updateRoutableLiveInstanceMap(getRoutableInstanceConfigMap(), + _liveInstancePropertyCache.getPropertyMap()); + Map liveInstanceMap = getRoutableLiveInstances(); _currentStateCache.refresh(accessor, liveInstanceMap); LOG.info("Reload CurrentStates. Takes " + (System.currentTimeMillis() - start) + " ms"); } @@ -150,6 +183,41 @@ public synchronized void refresh(HelixDataAccessor accessor) { } } + private void updateRoutableInstanceConfigMap(Map instanceConfigMap) { + _routableInstanceConfigMap = instanceConfigMap.entrySet().stream().filter( + (instanceConfigEntry) -> !NON_ROUTABLE_INSTANCE_OPERATIONS.contains( + instanceConfigEntry.getValue().getInstanceOperation())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + private void updateRoutableLiveInstanceMap(Map instanceConfigMap, + Map liveInstanceMap) { + _routableLiveInstanceMap = liveInstanceMap.entrySet().stream().filter( + (liveInstanceEntry) -> instanceConfigMap.containsKey(liveInstanceEntry.getKey()) + && !NON_ROUTABLE_INSTANCE_OPERATIONS.contains( + instanceConfigMap.get(liveInstanceEntry.getKey()).getInstanceOperation())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + /** + * Returns the LiveInstances for each of the routable instances that are currently up and + * running. + * + * @return a map of LiveInstances + */ + public Map getRoutableLiveInstances() { + return Collections.unmodifiableMap(_routableLiveInstanceMap); + } + + /** + * Returns the instance config map for all the routable instances that are in the cluster. + * + * @return a map of InstanceConfigs + */ + public Map getRoutableInstanceConfigMap() { + return Collections.unmodifiableMap(_routableInstanceConfigMap); + } + /** * Retrieves the TargetExternalView for all resources * diff --git a/helix-core/src/main/java/org/apache/helix/spectator/RoutingTableProvider.java b/helix-core/src/main/java/org/apache/helix/spectator/RoutingTableProvider.java index 0d97c9fec3..c27f084627 100644 --- a/helix-core/src/main/java/org/apache/helix/spectator/RoutingTableProvider.java +++ b/helix-core/src/main/java/org/apache/helix/spectator/RoutingTableProvider.java @@ -923,14 +923,16 @@ protected void handleEvent(ClusterEvent event) { case EXTERNALVIEW: { String keyReference = generateReferenceKey(propertyType.name(), DEFAULT_STATE_TYPE); refreshExternalView(_dataCache.getExternalViews().values(), - _dataCache.getInstanceConfigMap().values(), _dataCache.getLiveInstances().values(), + _dataCache.getRoutableInstanceConfigMap().values(), + _dataCache.getRoutableLiveInstances().values(), keyReference); } break; case TARGETEXTERNALVIEW: { String keyReference = generateReferenceKey(propertyType.name(), DEFAULT_STATE_TYPE); refreshExternalView(_dataCache.getTargetExternalViews().values(), - _dataCache.getInstanceConfigMap().values(), _dataCache.getLiveInstances().values(), + _dataCache.getRoutableInstanceConfigMap().values(), + _dataCache.getRoutableLiveInstances().values(), keyReference); } break; @@ -938,13 +940,15 @@ protected void handleEvent(ClusterEvent event) { for (String customizedStateType : _sourceDataTypeMap.getOrDefault(PropertyType.CUSTOMIZEDVIEW, Collections.emptyList())) { String keyReference = generateReferenceKey(propertyType.name(), customizedStateType); refreshCustomizedView(_dataCache.getCustomizedView(customizedStateType).values(), - _dataCache.getInstanceConfigMap().values(), _dataCache.getLiveInstances().values(), keyReference); + _dataCache.getRoutableInstanceConfigMap().values(), + _dataCache.getRoutableLiveInstances().values(), keyReference); } break; case CURRENTSTATES: { String keyReference = generateReferenceKey(propertyType.name(), DEFAULT_STATE_TYPE);; - refreshCurrentState(_dataCache.getCurrentStatesMap(), _dataCache.getInstanceConfigMap().values(), - _dataCache.getLiveInstances().values(), keyReference); + refreshCurrentState(_dataCache.getCurrentStatesMap(), + _dataCache.getRoutableInstanceConfigMap().values(), + _dataCache.getRoutableLiveInstances().values(), keyReference); recordPropagationLatency(System.currentTimeMillis(), _dataCache.getCurrentStateSnapshot()); } break; diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java index b7c90d8412..3f0aa5d9ec 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java +++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java @@ -1,6 +1,8 @@ package org.apache.helix.integration.rebalancer; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Date; import java.util.HashMap; @@ -16,8 +18,12 @@ import org.apache.helix.HelixAdmin; import org.apache.helix.HelixDataAccessor; import org.apache.helix.HelixException; +import org.apache.helix.HelixManager; +import org.apache.helix.HelixManagerFactory; import org.apache.helix.HelixRollbackException; +import org.apache.helix.InstanceType; import org.apache.helix.NotificationContext; +import org.apache.helix.PropertyType; import org.apache.helix.TestHelper; import org.apache.helix.common.ZkTestBase; import org.apache.helix.constants.InstanceConstants; @@ -41,10 +47,12 @@ import org.apache.helix.participant.statemachine.StateModelFactory; import org.apache.helix.participant.statemachine.StateModelInfo; import org.apache.helix.participant.statemachine.Transition; +import org.apache.helix.spectator.RoutingTableProvider; import org.apache.helix.tools.ClusterVerifiers.BestPossibleExternalViewVerifier; import org.apache.helix.tools.ClusterVerifiers.StrictMatchExternalViewVerifier; import org.apache.helix.tools.ClusterVerifiers.ZkHelixClusterVerifier; import org.testng.Assert; +import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -70,6 +78,10 @@ public class TestInstanceOperation extends ZkTestBase { ImmutableSet.of("MASTER", "LEADER", "SLAVE", "STANDBY"); private int REPLICA = 3; protected ClusterControllerManager _controller; + private HelixManager _spectator; + private RoutingTableProvider _routingTableProviderDefault; + private RoutingTableProvider _routingTableProviderEV; + private RoutingTableProvider _routingTableProviderCS; List _participants = new ArrayList<>(); private List _originalParticipantNames = new ArrayList<>(); List _participantNames = new ArrayList<>(); @@ -113,6 +125,15 @@ public void beforeClass() throws Exception { _configAccessor = new ConfigAccessor(_gZkClient); _dataAccessor = new ZKHelixDataAccessor(CLUSTER_NAME, _baseAccessor); + // start spectator + _spectator = + HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, "spectator", InstanceType.SPECTATOR, + ZK_ADDR); + _spectator.connect(); + _routingTableProviderDefault = new RoutingTableProvider(_spectator); + _routingTableProviderEV = new RoutingTableProvider(_spectator, PropertyType.EXTERNALVIEW); + _routingTableProviderCS = new RoutingTableProvider(_spectator, PropertyType.CURRENTSTATES); + setupClusterConfig(); createTestDBs(DEFAULT_RESOURCE_DELAY_TIME); @@ -122,6 +143,18 @@ public void beforeClass() throws Exception { _admin = new ZKHelixAdmin(_gZkClient); } + @AfterClass + public void afterClass() { + for (MockParticipantManager p : _participants) { + p.syncStop(); + } + _controller.syncStop(); + _routingTableProviderDefault.shutdown(); + _routingTableProviderEV.shutdown(); + _routingTableProviderCS.shutdown(); + _spectator.disconnect(); + } + private void setupClusterConfig() { _stateModelDelay = 3L; ClusterConfig clusterConfig = _configAccessor.getClusterConfig(CLUSTER_NAME); @@ -696,12 +729,21 @@ public void testNodeSwap() throws Exception { // Assert canSwapBeCompleted is true Assert.assertTrue(_gSetupTool.getClusterManagementTool() .canCompleteSwap(CLUSTER_NAME, instanceToSwapOutName)); + + // Validate that the SWAP_OUT instance is in routing tables and SWAP_IN is not. + validateRoutingTablesInstance(getEVs(), instanceToSwapOutName, true); + validateRoutingTablesInstance(getEVs(), instanceToSwapInName, false); + // Assert completeSwapIfPossible is true Assert.assertTrue(_gSetupTool.getClusterManagementTool() .completeSwapIfPossible(CLUSTER_NAME, instanceToSwapOutName)); Assert.assertTrue(_clusterVerifier.verifyByPolling()); + // Validate that the SWAP_IN instance is now in the routing tables. + validateRoutingTablesInstance(getEVs(), instanceToSwapInName, true); + + // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. Assert.assertFalse(_gSetupTool.getClusterManagementTool() .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); @@ -760,6 +802,10 @@ public void testNodeSwapSwapInNodeNoInstanceOperationDisabled() throws Exception validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Set.of(instanceToSwapInName), Collections.emptySet()); + // Validate that the SWAP_OUT instance is in routing tables and SWAP_IN is not. + validateRoutingTablesInstance(getEVs(), instanceToSwapOutName, true); + validateRoutingTablesInstance(getEVs(), instanceToSwapInName, false); + // Assert canSwapBeCompleted is true Assert.assertTrue(_gSetupTool.getClusterManagementTool() .canCompleteSwap(CLUSTER_NAME, instanceToSwapOutName)); @@ -821,6 +867,10 @@ public void testNodeSwapCancelSwapWhenReadyToComplete() throws Exception { validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Set.of(instanceToSwapInName), Collections.emptySet()); + // Validate that the SWAP_OUT instance is in routing tables and SWAP_IN is not. + validateRoutingTablesInstance(getEVs(), instanceToSwapOutName, true); + validateRoutingTablesInstance(getEVs(), instanceToSwapInName, false); + // Assert canSwapBeCompleted is true Assert.assertTrue(_gSetupTool.getClusterManagementTool() .canCompleteSwap(CLUSTER_NAME, instanceToSwapOutName)); @@ -832,6 +882,10 @@ public void testNodeSwapCancelSwapWhenReadyToComplete() throws Exception { // Wait for cluster to converge. Assert.assertTrue(_clusterVerifier.verifyByPolling()); + // Validate that the SWAP_OUT instance is in routing tables and SWAP_IN is not. + validateRoutingTablesInstance(getEVs(), instanceToSwapOutName, true); + validateRoutingTablesInstance(getEVs(), instanceToSwapInName, false); + // Validate there are no partitions on the SWAP_IN instance. Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapInName).size(), 0); @@ -905,6 +959,10 @@ public void testNodeSwapAfterEMM() throws Exception { validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Set.of(instanceToSwapInName), Collections.emptySet()); + // Validate that the SWAP_OUT instance is in routing tables and SWAP_IN is not. + validateRoutingTablesInstance(getEVs(), instanceToSwapOutName, true); + validateRoutingTablesInstance(getEVs(), instanceToSwapInName, false); + // Assert canSwapBeCompleted is true Assert.assertTrue(_gSetupTool.getClusterManagementTool() .canCompleteSwap(CLUSTER_NAME, instanceToSwapOutName)); @@ -914,6 +972,9 @@ public void testNodeSwapAfterEMM() throws Exception { Assert.assertTrue(_clusterVerifier.verifyByPolling()); + // Validate that the SWAP_IN instance is now in the routing tables. + validateRoutingTablesInstance(getEVs(), instanceToSwapInName, true); + // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. Assert.assertFalse(_gSetupTool.getClusterManagementTool() .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); @@ -1116,6 +1177,10 @@ public void testNodeSwapAddSwapInFirst() { validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Set.of(instanceToSwapInName), Collections.emptySet()); + // Validate that the SWAP_OUT instance is in routing tables and SWAP_IN is not. + validateRoutingTablesInstance(getEVs(), instanceToSwapOutName, true); + validateRoutingTablesInstance(getEVs(), instanceToSwapInName, false); + // Assert canSwapBeCompleted is true Assert.assertTrue(_gSetupTool.getClusterManagementTool() .canCompleteSwap(CLUSTER_NAME, instanceToSwapOutName)); @@ -1125,6 +1190,9 @@ public void testNodeSwapAddSwapInFirst() { Assert.assertTrue(_clusterVerifier.verifyByPolling()); + // Validate that the SWAP_IN instance is now in the routing tables. + validateRoutingTablesInstance(getEVs(), instanceToSwapInName, true); + // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. Assert.assertFalse(_gSetupTool.getClusterManagementTool() .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); @@ -1246,6 +1314,47 @@ private Map getPartitionsAndStatesOnInstance(Map> getResourcePartitionStateOnInstance( + Map evs, String instanceName) { + Map> stateByPartitionByResource = new HashMap<>(); + for (String resourceEV : evs.keySet()) { + for (String partition : evs.get(resourceEV).getPartitionSet()) { + if (evs.get(resourceEV).getStateMap(partition).containsKey(instanceName)) { + if (!stateByPartitionByResource.containsKey(resourceEV)) { + stateByPartitionByResource.put(resourceEV, new HashMap<>()); + } + stateByPartitionByResource.get(resourceEV) + .put(partition, evs.get(resourceEV).getStateMap(partition).get(instanceName)); + } + } + } + + return stateByPartitionByResource; + } + + private Set getInstanceNames(Collection instanceConfigs) { + return instanceConfigs.stream().map(InstanceConfig::getInstanceName) + .collect(Collectors.toSet()); + } + + private void validateRoutingTablesInstance(Map evs, String instanceName, + boolean shouldContain) { + RoutingTableProvider[] routingTableProviders = + new RoutingTableProvider[]{_routingTableProviderDefault, _routingTableProviderEV, _routingTableProviderCS}; + getResourcePartitionStateOnInstance(evs, instanceName).forEach((resource, partitions) -> { + partitions.forEach((partition, state) -> { + Arrays.stream(routingTableProviders).forEach(rtp -> Assert.assertEquals( + getInstanceNames(rtp.getInstancesForResource(resource, partition, state)).contains( + instanceName), shouldContain)); + }); + }); + + Arrays.stream(routingTableProviders).forEach(rtp -> { + Assert.assertEquals(getInstanceNames(rtp.getInstanceConfigs()).contains(instanceName), + shouldContain); + }); + } + private void validateEVCorrect(ExternalView actual, ExternalView original, Map swapOutInstancesToSwapInInstances, Set inFlightSwapInInstances, Set completedSwapInInstanceNames) { diff --git a/helix-core/src/test/java/org/apache/helix/integration/spectator/TestRoutingTableProviderFromCurrentStates.java b/helix-core/src/test/java/org/apache/helix/integration/spectator/TestRoutingTableProviderFromCurrentStates.java index e8f4f82b2b..cbf2998604 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/spectator/TestRoutingTableProviderFromCurrentStates.java +++ b/helix-core/src/test/java/org/apache/helix/integration/spectator/TestRoutingTableProviderFromCurrentStates.java @@ -263,7 +263,7 @@ private void validatePropagationLatency(PropertyType type, final long upperBound } @Test(dependsOnMethods = "testRoutingTableWithCurrentStates") - public void TestInconsistentStateEventProcessing() throws Exception { + public void testInconsistentStateEventProcessing() throws Exception { // This test requires an additional HelixManager since one of the provider event processing will // be blocked. HelixManager helixManager = HelixManagerFactory @@ -305,10 +305,10 @@ public void TestInconsistentStateEventProcessing() throws Exception { IdealState idealState = _gSetupTool.getClusterManagementTool().getResourceIdealState(CLUSTER_NAME, db); String targetPartitionName = idealState.getPartitionSet().iterator().next(); - // Wait until the routingtable is updated. + // Wait until the routing table is updated. BlockingCurrentStateRoutingTableProvider finalRoutingTableCS = routingTableCS; Assert.assertTrue(TestHelper.verify( - () -> finalRoutingTableCS.getInstances(db, targetPartitionName, "MASTER").size() > 0, + () -> !finalRoutingTableCS.getInstances(db, targetPartitionName, "MASTER").isEmpty(), 2000)); String targetNodeName = routingTableCS.getInstances(db, targetPartitionName, "MASTER").get(0).getInstanceName(); @@ -352,7 +352,7 @@ public void TestInconsistentStateEventProcessing() throws Exception { } } - @Test(dependsOnMethods = { "TestInconsistentStateEventProcessing" }) + @Test(dependsOnMethods = {"testInconsistentStateEventProcessing"}) public void testWithSupportSourceDataType() { new RoutingTableProvider(_manager, PropertyType.EXTERNALVIEW).shutdown(); new RoutingTableProvider(_manager, PropertyType.TARGETEXTERNALVIEW).shutdown(); From d70a9cbec5e268b7e0a07e69ebb8a5e03c5844a5 Mon Sep 17 00:00:00 2001 From: Zachary Pinto Date: Thu, 14 Dec 2023 15:49:55 -0800 Subject: [PATCH 10/11] Build Topology with only required levels (FaultZone and EndNode) (#2713) * Change all rebalancer strategies to create Topology without additional non-FaultZone or EndNode levels of the tree. This will allow for swap to work in clusters where the non-FaultZone or EndNode domain kv pairs don't directly match the swapping node. --- ...ractEvenDistributionRebalanceStrategy.java | 2 +- .../strategy/CrushRebalanceStrategy.java | 2 +- .../MultiRoundCrushRebalanceStrategy.java | 2 +- .../rebalancer/topology/Topology.java | 42 ++++++++++++++-- .../waged/model/ClusterModelProvider.java | 5 +- .../rebalancer/TestInstanceOperation.java | 48 ++++++++++--------- 6 files changed, 72 insertions(+), 29 deletions(-) diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/AbstractEvenDistributionRebalanceStrategy.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/AbstractEvenDistributionRebalanceStrategy.java index 7750bd70b0..c10a6a9ab2 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/AbstractEvenDistributionRebalanceStrategy.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/AbstractEvenDistributionRebalanceStrategy.java @@ -117,7 +117,7 @@ private ZNRecord computeBestPartitionAssignment(List allNodes, List> finalPartitionMap = null; Topology allNodeTopo = new Topology(allNodes, allNodes, clusterData.getAssignableInstanceConfigMap(), - clusterData.getClusterConfig()); + clusterData.getClusterConfig(), true); // Transform current assignment to instance->partitions map, and get total partitions Map> nodeToPartitionMap = convertPartitionMap(origPartitionMap, allNodeTopo); diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/CrushRebalanceStrategy.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/CrushRebalanceStrategy.java index 08bbaaffa0..011da67772 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/CrushRebalanceStrategy.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/CrushRebalanceStrategy.java @@ -77,7 +77,7 @@ public ZNRecord computePartitionAssignment(final List allNodes, ResourceControllerDataProvider clusterData) throws HelixException { Map instanceConfigMap = clusterData.getAssignableInstanceConfigMap(); _clusterTopo = - new Topology(allNodes, liveNodes, instanceConfigMap, clusterData.getClusterConfig()); + new Topology(allNodes, liveNodes, instanceConfigMap, clusterData.getClusterConfig(), true); Node topNode = _clusterTopo.getRootNode(); // for log only diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/MultiRoundCrushRebalanceStrategy.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/MultiRoundCrushRebalanceStrategy.java index a53257f3dd..96ddfa485d 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/MultiRoundCrushRebalanceStrategy.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/strategy/MultiRoundCrushRebalanceStrategy.java @@ -84,7 +84,7 @@ public ZNRecord computePartitionAssignment(final List allNodes, ResourceControllerDataProvider clusterData) throws HelixException { Map instanceConfigMap = clusterData.getAssignableInstanceConfigMap(); _clusterTopo = - new Topology(allNodes, liveNodes, instanceConfigMap, clusterData.getClusterConfig()); + new Topology(allNodes, liveNodes, instanceConfigMap, clusterData.getClusterConfig(), true); Node root = _clusterTopo.getRootNode(); Map> zoneMapping = new HashMap<>(); diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/topology/Topology.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/topology/Topology.java index 4d4ebabd11..335c30fdf2 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/topology/Topology.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/topology/Topology.java @@ -23,6 +23,7 @@ import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -57,8 +58,19 @@ public enum Types { private final Map _instanceConfigMap; private final ClusterTopologyConfig _clusterTopologyConfig; + /** + * Create a Topology for a cluster. + * + * @param allNodes allNodes of the given cluster. + * @param liveNodes liveNodes of the given cluster. + * @param instanceConfigMap instanceConfigMap of the given cluster. + * @param clusterConfig clusterConfig of the given cluster. + * @param faultZoneLevelOnly whether to include additional non-faultZone level nodes in the + * topology tree above the end-nodes. + */ public Topology(final List allNodes, final List liveNodes, - final Map instanceConfigMap, ClusterConfig clusterConfig) { + final Map instanceConfigMap, ClusterConfig clusterConfig, + boolean faultZoneLevelOnly) { try { _md = MessageDigest.getInstance("SHA-1"); } catch (NoSuchAlgorithmException ex) { @@ -73,7 +85,20 @@ public Topology(final List allNodes, final List liveNodes, _allInstances.removeAll(_instanceConfigMap.keySet()))); } _clusterTopologyConfig = ClusterTopologyConfig.createFromClusterConfig(clusterConfig); - _root = createClusterTree(clusterConfig); + _root = createClusterTree(clusterConfig, faultZoneLevelOnly); + } + + /** + * Create a Topology for a cluster. faultZoneLevelOnly is set to false by default. + * + * @param allNodes allNodes of the given cluster. + * @param liveNodes liveNodes of the given cluster. + * @param instanceConfigMap instanceConfigMap of the given cluster. + * @param clusterConfig clusterConfig of the given cluster. + */ + public Topology(final List allNodes, final List liveNodes, + final Map instanceConfigMap, ClusterConfig clusterConfig) { + this(allNodes, liveNodes, instanceConfigMap, clusterConfig, false); } public String getEndNodeType() { @@ -149,13 +174,18 @@ private static Node cloneTree(Node root, Map newNodeWeight, return newRoot; } - private Node createClusterTree(ClusterConfig clusterConfig) { + private Node createClusterTree(ClusterConfig clusterConfig, boolean faultZoneLevelOnly) { // root Node root = new Node(); root.setName("root"); root.setId(computeId("root")); root.setType(Types.ROOT.name()); + Set unnecessaryTopoKeys = + new HashSet<>(_clusterTopologyConfig.getTopologyKeyDefaultValue().keySet()); + unnecessaryTopoKeys.remove(_clusterTopologyConfig.getFaultZoneType()); + unnecessaryTopoKeys.remove(_clusterTopologyConfig.getEndNodeType()); + // TODO: Currently we add disabled instance to the topology tree. Since they are not considered // TODO: in rebalance, maybe we should skip adding them to the tree for consistence. for (String instanceName : _allInstances) { @@ -167,6 +197,12 @@ private Node createClusterTree(ClusterConfig clusterConfig) { if (weight < 0 || weight == InstanceConfig.WEIGHT_NOT_SET) { weight = DEFAULT_NODE_WEIGHT; } + + if (faultZoneLevelOnly) { + // Remove unnecessary keys from the topology map. We do not need to use these to build more layers in + // the topology tree. The topology tree only requires FaultZoneType and EndNodeType. + unnecessaryTopoKeys.forEach(instanceTopologyMap::remove); + } addEndNode(root, instanceName, instanceTopologyMap, weight, _liveInstances); } catch (IllegalArgumentException e) { if (InstanceValidationUtil.isInstanceEnabled(insConfig, clusterConfig)) { diff --git a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java index 69fec9b2ca..dc825f2f78 100644 --- a/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java +++ b/helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterModelProvider.java @@ -213,7 +213,10 @@ private static ClusterModel generateClusterModel(ResourceControllerDataProvider new InstanceConfig(instanceName)) .getLogicalId(clusterTopologyConfig.getEndNodeType())).collect(Collectors.toSet()); - Set assignableLiveInstanceNames = dataProvider.getAssignableLiveInstances().keySet(); + // TODO: Figure out why streaming the keySet directly in rare cases causes ConcurrentModificationException + // In theory, this should not be happening since cache refresh is at beginning of the pipeline, so could be some other reason. + // For now, we just copy the keySet to a new HashSet to avoid the exception. + Set assignableLiveInstanceNames = new HashSet<>(dataProvider.getAssignableLiveInstances().keySet()); Set assignableLiveInstanceLogicalIds = assignableLiveInstanceNames.stream().map( instanceName -> assignableInstanceConfigMap.getOrDefault(instanceName, diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java index 3f0aa5d9ec..7cd08d86fa 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java +++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java @@ -10,6 +10,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; @@ -58,7 +59,8 @@ public class TestInstanceOperation extends ZkTestBase { - protected final int NUM_NODE = 6; + private final int ZONE_COUNT = 4; + protected final int NUM_NODE = 10; protected static final int START_PORT = 12918; protected static final int PARTITIONS = 20; @@ -145,6 +147,13 @@ public void beforeClass() throws Exception { @AfterClass public void afterClass() { + // Drop all DBs + for (String db : _allDBs) { + _gSetupTool.dropResourceFromCluster(CLUSTER_NAME, db); + } + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + for (MockParticipantManager p : _participants) { p.syncStop(); } @@ -208,13 +217,9 @@ private void resetInstances() { for (int i = 0; i < _participants.size(); i++) { // If instance is not connected to ZK, replace it if (!_participants.get(i).isConnected()) { - // Drop bad instance from the cluster. - _gSetupTool.getClusterManagementTool() - .dropInstance(CLUSTER_NAME, _gSetupTool.getClusterManagementTool().getInstanceConfig(CLUSTER_NAME, _participantNames.get(i))); - _participants.set(i, createParticipant(_participantNames.get(i), Integer.toString(i), - "zone_" + i, null, true, -1)); + // Replace the stopped participant with a new one and inherit the old instance config. + _participants.set(i, createParticipant(_participantNames.get(i))); _participants.get(i).syncStart(); - continue; } _gSetupTool.getClusterManagementTool() .setInstanceOperation(CLUSTER_NAME, _participantNames.get(i), null); @@ -1205,17 +1210,7 @@ public void testNodeSwapAddSwapInFirst() { Collections.emptySet(), Set.of(instanceToSwapInName)); } - private MockParticipantManager createParticipant(String participantName, String logicalId, String zone, - InstanceConstants.InstanceOperation instanceOperation, boolean enabled, int capacity) { - InstanceConfig config = new InstanceConfig.Builder().setDomain( - String.format("%s=%s, %s=%s, %s=%s", ZONE, zone, HOST, participantName, LOGICAL_ID, - logicalId)).setInstanceEnabled(enabled).setInstanceOperation(instanceOperation) - .build(participantName); - if (capacity >= 0) { - config.setInstanceCapacityMap(Map.of(TEST_CAPACITY_KEY, capacity)); - } - _gSetupTool.getClusterManagementTool().addInstance(CLUSTER_NAME, config); - + private MockParticipantManager createParticipant(String participantName) { // start dummy participants MockParticipantManager participant = new MockParticipantManager(ZK_ADDR, CLUSTER_NAME, participantName); StateMachineEngine stateMachine = participant.getStateMachineEngine(); @@ -1227,8 +1222,17 @@ private MockParticipantManager createParticipant(String participantName, String private void addParticipant(String participantName, String logicalId, String zone, InstanceConstants.InstanceOperation instanceOperation, boolean enabled, int capacity) { - MockParticipantManager participant = createParticipant(participantName, logicalId, zone, - instanceOperation, enabled, capacity); + InstanceConfig config = new InstanceConfig.Builder().setDomain( + String.format("%s=%s, %s=%s, %s=%s", ZONE, zone, HOST, participantName, LOGICAL_ID, + logicalId)).setInstanceEnabled(enabled).setInstanceOperation(instanceOperation) + .build(participantName); + + if (capacity >= 0) { + config.setInstanceCapacityMap(Map.of(TEST_CAPACITY_KEY, capacity)); + } + _gSetupTool.getClusterManagementTool().addInstance(CLUSTER_NAME, config); + + MockParticipantManager participant = createParticipant(participantName); participant.syncStart(); _participants.add(participant); @@ -1236,8 +1240,8 @@ private void addParticipant(String participantName, String logicalId, String zon } private void addParticipant(String participantName) { - addParticipant(participantName, Integer.toString(_participants.size()), - "zone_" + _participants.size(), null, true, -1); + addParticipant(participantName, UUID.randomUUID().toString(), + "zone_" + _participants.size() % ZONE_COUNT, null, true, -1); } private void createTestDBs(long delayTime) throws InterruptedException { From 5455135115926dc260d377fdaebf48a2d5d818c3 Mon Sep 17 00:00:00 2001 From: Zachary Pinto Date: Tue, 19 Dec 2023 16:37:19 -0700 Subject: [PATCH 11/11] Stabilize TestInstanceOperation (#2715) * Stabilize TestInstanceOperation. clusterVerifier is evaluating to true once the partitionAssignment matches the expected value; however, it is before the TopState is transfered back to the SWAP_IN node. This can be fixed by using TestHelper.verify to check that states converge within TIMEOUT. * Moved evacuate tests with long ST resources to the end because it was taking long time to drop DBs which was causing flakyness in later tests. Ran TestInstanceOperation 5 times locally with success. --- .../rebalancer/TestInstanceOperation.java | 630 +++++++++--------- 1 file changed, 326 insertions(+), 304 deletions(-) diff --git a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java index 7cd08d86fa..bf6db29008 100644 --- a/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java +++ b/helix-core/src/test/java/org/apache/helix/integration/rebalancer/TestInstanceOperation.java @@ -52,6 +52,8 @@ import org.apache.helix.tools.ClusterVerifiers.BestPossibleExternalViewVerifier; import org.apache.helix.tools.ClusterVerifiers.StrictMatchExternalViewVerifier; import org.apache.helix.tools.ClusterVerifiers.ZkHelixClusterVerifier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -59,9 +61,12 @@ public class TestInstanceOperation extends ZkTestBase { + private static final Logger LOG = LoggerFactory.getLogger(TestHelper.class); + public static final int TIMEOUT = 10000; private final int ZONE_COUNT = 4; - protected final int NUM_NODE = 10; + protected final int START_NUM_NODE = 10; protected static final int START_PORT = 12918; + private static int _nextStartPort = START_PORT; protected static final int PARTITIONS = 20; protected final String CLASS_NAME = getShortClassName(); @@ -85,7 +90,6 @@ public class TestInstanceOperation extends ZkTestBase { private RoutingTableProvider _routingTableProviderEV; private RoutingTableProvider _routingTableProviderCS; List _participants = new ArrayList<>(); - private List _originalParticipantNames = new ArrayList<>(); List _participantNames = new ArrayList<>(); private Set _allDBs = new HashSet<>(); private ZkHelixClusterVerifier _clusterVerifier; @@ -104,9 +108,8 @@ public void beforeClass() throws Exception { _gSetupTool.addCluster(CLUSTER_NAME, true); - for (int i = 0; i < NUM_NODE; i++) { - String participantName = PARTICIPANT_PREFIX + "_" + (START_PORT + i); - _originalParticipantNames.add(participantName); + for (int i = 0; i < START_NUM_NODE; i++) { + String participantName = PARTICIPANT_PREFIX + "_" + _nextStartPort; addParticipant(participantName); } @@ -171,7 +174,6 @@ private void setupClusterConfig() { clusterConfig.setDelayRebalaceEnabled(true); clusterConfig.setRebalanceDelayTime(1800000L); _configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig); - enabledTopologyAwareRebalance(); Assert.assertTrue(_clusterVerifier.verifyByPolling()); } @@ -196,34 +198,23 @@ private void disableTopologyAwareRebalance() { Assert.assertTrue(_clusterVerifier.verifyByPolling()); } - private void resetInstances() { - // Disable and drop any participants that are not in the original participant list. - Set droppedParticipants = new HashSet<>(); + private void removeOfflineOrDisabledOrSwapInInstances() { + // Remove all instances that are not live, disabled, or in SWAP_IN state. for (int i = 0; i < _participants.size(); i++) { String participantName = _participantNames.get(i); - if (!_originalParticipantNames.contains(participantName)) { - _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME, participantName, false); - _participants.get(i).syncStop(); - _gSetupTool.getClusterManagementTool() - .dropInstance(CLUSTER_NAME, _gSetupTool.getClusterManagementTool().getInstanceConfig(CLUSTER_NAME, participantName)); - droppedParticipants.add(participantName); - } - } - - // Remove the dropped instance from _participants and _participantNames - _participantNames.removeIf(droppedParticipants::contains); - _participants.removeIf(p -> droppedParticipants.contains(p.getInstanceName())); - - for (int i = 0; i < _participants.size(); i++) { - // If instance is not connected to ZK, replace it - if (!_participants.get(i).isConnected()) { - // Replace the stopped participant with a new one and inherit the old instance config. - _participants.set(i, createParticipant(_participantNames.get(i))); - _participants.get(i).syncStart(); + InstanceConfig instanceConfig = + _gSetupTool.getClusterManagementTool().getInstanceConfig(CLUSTER_NAME, participantName); + if (!_participants.get(i).isConnected() || !instanceConfig.getInstanceEnabled() + || instanceConfig.getInstanceOperation() + .equals(InstanceConstants.InstanceOperation.SWAP_IN.name())) { + if (_participants.get(i).isConnected()) { + _participants.get(i).syncStop(); + } + _gSetupTool.getClusterManagementTool().dropInstance(CLUSTER_NAME, instanceConfig); + _participantNames.remove(i); + _participants.remove(i); + i--; } - _gSetupTool.getClusterManagementTool() - .setInstanceOperation(CLUSTER_NAME, _participantNames.get(i), null); - _gSetupTool.getClusterManagementTool().enableInstance(CLUSTER_NAME, _participantNames.get(i), true); } Assert.assertTrue(_clusterVerifier.verifyByPolling()); @@ -328,200 +319,36 @@ public void testAddingNodeWithEvacuationTag() throws Exception { } } - @Test(dependsOnMethods = "testAddingNodeWithEvacuationTag") - public void testEvacuateAndCancelBeforeBootstrapFinish() throws Exception { - System.out.println("START TestInstanceOperation.testEvacuateAndCancelBeforeBootstrapFinish() at " + new Date(System.currentTimeMillis())); - // add a resource where downward state transition is slow - createResourceWithDelayedRebalance(CLUSTER_NAME, "TEST_DB3_DELAYED_CRUSHED", "MasterSlave", PARTITIONS, REPLICA, - REPLICA - 1, 200000, CrushEdRebalanceStrategy.class.getName()); - _allDBs.add("TEST_DB3_DELAYED_CRUSHED"); - // add a resource where downward state transition is slow - createResourceWithWagedRebalance(CLUSTER_NAME, "TEST_DB4_DELAYED_WAGED", "MasterSlave", - PARTITIONS, REPLICA, REPLICA - 1); - _allDBs.add("TEST_DB4_DELAYED_WAGED"); - // wait for assignment to finish - Assert.assertTrue(_clusterVerifier.verifyByPolling()); - - // set bootstrap ST delay to a large number - _stateModelDelay = -10000L; - // evacuate an instance - String instanceToEvacuate = _participants.get(0).getInstanceName(); - _gSetupTool.getClusterManagementTool() - .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, InstanceConstants.InstanceOperation.EVACUATE); - // Messages should be pending at all instances besides the evacuate one - for (String participant : _participantNames) { - if (participant.equals(instanceToEvacuate)) { - continue; - } - TestHelper.verify( - () -> ((_dataAccessor.getChildNames(_dataAccessor.keyBuilder().messages(participant))).isEmpty()), 30000); - } - Assert.assertFalse(_admin.isEvacuateFinished(CLUSTER_NAME, instanceToEvacuate)); - Assert.assertFalse(_admin.isReadyForPreparingJoiningCluster(CLUSTER_NAME, instanceToEvacuate)); - - // sleep a bit so ST messages can start executing - Thread.sleep(Math.abs(_stateModelDelay / 100)); - // before we cancel, check current EV - Map assignment = getEVs(); - for (String resource : _allDBs) { - // check every replica has >= 3 partitions and a top state partition - validateAssignmentInEv(assignment.get(resource)); - } - - // cancel the evacuation - _gSetupTool.getClusterManagementTool() - .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, null); - - assignment = getEVs(); - for (String resource : _allDBs) { - // check every replica has >= 3 active replicas, even before cluster converge - validateAssignmentInEv(assignment.get(resource)); - } - - // check cluster converge. We have longer delay for ST then verifier timeout. It will only converge if we cancel ST. - Assert.assertTrue(_clusterVerifier.verifyByPolling()); - - // EV should contain all participants, check resources one by one - assignment = getEVs(); - for (String resource : _allDBs) { - Assert.assertTrue(getParticipantsInEv(assignment.get(resource)).containsAll(_participantNames)); - // check every replica has >= 3 active replicas again - validateAssignmentInEv(assignment.get(resource)); - } - } - - @Test(dependsOnMethods = "testEvacuateAndCancelBeforeBootstrapFinish") - public void testEvacuateAndCancelBeforeDropFinish() throws Exception { - System.out.println("START TestInstanceOperation.testEvacuateAndCancelBeforeDropFinish() at " + new Date(System.currentTimeMillis())); - - // set DROP ST delay to a large number - _stateModelDelay = 10000L; - - // evacuate an instance - String instanceToEvacuate = _participants.get(0).getInstanceName(); - _gSetupTool.getClusterManagementTool() - .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, InstanceConstants.InstanceOperation.EVACUATE); - - // message should be pending at the to evacuate participant - TestHelper.verify( - () -> ((_dataAccessor.getChildNames(_dataAccessor.keyBuilder().messages(instanceToEvacuate))).isEmpty()), 30000); - Assert.assertFalse(_admin.isEvacuateFinished(CLUSTER_NAME, instanceToEvacuate)); - - // cancel evacuation - _gSetupTool.getClusterManagementTool() - .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, null); - // check every replica has >= 3 active replicas, even before cluster converge - Map assignment = getEVs(); - for (String resource : _allDBs) { - validateAssignmentInEv(assignment.get(resource)); - } - - Assert.assertTrue(_clusterVerifier.verifyByPolling()); - - // EV should contain all participants, check resources one by one - assignment = getEVs(); - for (String resource : _allDBs) { - Assert.assertTrue(getParticipantsInEv(assignment.get(resource)).containsAll(_participantNames)); - // check every replica has >= 3 active replicas - validateAssignmentInEv(assignment.get(resource)); - } - } - - @Test(dependsOnMethods = "testEvacuateAndCancelBeforeDropFinish") - public void testMarkEvacuationAfterEMM() throws Exception { - System.out.println("START TestInstanceOperation.testMarkEvacuationAfterEMM() at " + new Date(System.currentTimeMillis())); - _stateModelDelay = 1000L; - Assert.assertFalse(_gSetupTool.getClusterManagementTool().isInMaintenanceMode(CLUSTER_NAME)); - _gSetupTool.getClusterManagementTool().manuallyEnableMaintenanceMode(CLUSTER_NAME, true, null, - null); - addParticipant(PARTICIPANT_PREFIX + "_" + (START_PORT + NUM_NODE)); - - - Assert.assertTrue(_clusterVerifier.verifyByPolling()); - Map assignment = getEVs(); - for (String resource : _allDBs) { - Assert.assertFalse(getParticipantsInEv(assignment.get(resource)).contains(_participantNames.get(NUM_NODE))); - } - - // set evacuate operation - String instanceToEvacuate = _participants.get(0).getInstanceName(); - _gSetupTool.getClusterManagementTool() - .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, InstanceConstants.InstanceOperation.EVACUATE); - - // there should be no evacuation happening - for (String resource : _allDBs) { - Assert.assertTrue(getParticipantsInEv(assignment.get(resource)).contains(instanceToEvacuate)); - } - - Assert.assertTrue(_clusterVerifier.verifyByPolling()); - - // exit MM - _gSetupTool.getClusterManagementTool().manuallyEnableMaintenanceMode(CLUSTER_NAME, false, null, - null); - - Assert.assertTrue(_clusterVerifier.verifyByPolling()); - - assignment = getEVs(); - List currentActiveInstances = - _participantNames.stream().filter(n -> !n.equals(instanceToEvacuate)).collect(Collectors.toList()); - for (String resource : _allDBs) { - validateAssignmentInEv(assignment.get(resource)); - Set newPAssignedParticipants = getParticipantsInEv(assignment.get(resource)); - Assert.assertFalse(newPAssignedParticipants.contains(instanceToEvacuate)); - Assert.assertTrue(newPAssignedParticipants.containsAll(currentActiveInstances)); - } - Assert.assertTrue(_admin.isReadyForPreparingJoiningCluster(CLUSTER_NAME, instanceToEvacuate)); - - _stateModelDelay = 3L; - } - - @Test(dependsOnMethods = "testMarkEvacuationAfterEMM") - public void testEvacuationWithOfflineInstancesInCluster() throws Exception { - System.out.println("START TestInstanceOperation.testEvacuationWithOfflineInstancesInCluster() at " + new Date(System.currentTimeMillis())); - _participants.get(1).syncStop(); - _participants.get(2).syncStop(); - - String evacuateInstanceName = _participants.get(_participants.size()-2).getInstanceName(); - _gSetupTool.getClusterManagementTool() - .setInstanceOperation(CLUSTER_NAME, evacuateInstanceName, InstanceConstants.InstanceOperation.EVACUATE); + @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testAddingNodeWithEvacuationTag") + public void testNodeSwapNoTopologySetup() throws Exception { + System.out.println("START TestInstanceOperation.testNodeSwapNoTopologySetup() at " + new Date( + System.currentTimeMillis())); + removeOfflineOrDisabledOrSwapInInstances(); - Map assignment; - // EV should contain all participants, check resources one by one - assignment = getEVs(); - for (String resource : _allDBs) { - TestHelper.verify(() -> { - ExternalView ev = assignment.get(resource); - for (String partition : ev.getPartitionSet()) { - AtomicInteger activeReplicaCount = new AtomicInteger(); - ev.getStateMap(partition) - .values() - .stream() - .filter(v -> v.equals("MASTER") || v.equals("LEADER") || v.equals("SLAVE") || v.equals("FOLLOWER") - || v.equals("STANDBY")) - .forEach(v -> activeReplicaCount.getAndIncrement()); - if (activeReplicaCount.get() < REPLICA - 1 || (ev.getStateMap(partition).containsKey(evacuateInstanceName) - && ev.getStateMap(partition).get(evacuateInstanceName).equals("MASTER") && ev.getStateMap(partition) - .get(evacuateInstanceName) - .equals("LEADER"))) { - return false; - } - } - return true; - }, 30000); - } + // Set instance's InstanceOperation to SWAP_OUT + String instanceToSwapOutName = _participants.get(0).getInstanceName(); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, + InstanceConstants.InstanceOperation.SWAP_OUT); - resetInstances(); - dropTestDBs(ImmutableSet.of("TEST_DB3_DELAYED_CRUSHED", "TEST_DB4_DELAYED_WAGED")); + // Add instance with InstanceOperation set to SWAP_IN + // There should be an error that the logicalId does not have SWAP_OUT instance because, + // helix can't determine what topology key to use to get the logicalId if TOPOLOGY is not set. + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; + InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() + .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); + addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), + instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), + InstanceConstants.InstanceOperation.SWAP_IN, true, -1); } - @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testEvacuationWithOfflineInstancesInCluster") + @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testNodeSwapNoTopologySetup") public void testAddingNodeWithSwapOutInstanceOperation() throws Exception { System.out.println( "START TestInstanceOperation.testAddingNodeWithSwapOutInstanceOperation() at " + new Date( System.currentTimeMillis())); enabledTopologyAwareRebalance(); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Set instance's InstanceOperation to SWAP_OUT String instanceToSwapOutName = _participants.get(0).getInstanceName(); @@ -531,7 +358,7 @@ public void testAddingNodeWithSwapOutInstanceOperation() throws Exception { InstanceConstants.InstanceOperation.SWAP_OUT); // Add instance with InstanceOperation set to SWAP_IN - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), InstanceConstants.InstanceOperation.SWAP_OUT, true, -1); @@ -543,7 +370,7 @@ public void testAddingNodeWithSwapOutNodeInstanceOperationUnset() throws Excepti "START TestInstanceOperation.testAddingNodeWithSwapOutNodeInstanceOperationUnset() at " + new Date(System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Set instance's InstanceOperation to null String instanceToSwapOutName = _participants.get(0).getInstanceName(); @@ -553,7 +380,7 @@ public void testAddingNodeWithSwapOutNodeInstanceOperationUnset() throws Excepti .setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, null); // Add instance with InstanceOperation set to SWAP_IN - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), InstanceConstants.InstanceOperation.SWAP_IN, true, -1); @@ -564,10 +391,10 @@ public void testNodeSwapWithNoSwapOutNode() throws Exception { System.out.println("START TestInstanceOperation.testNodeSwapWithNoSwapOutNode() at " + new Date( System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Add new instance with InstanceOperation set to SWAP_IN - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; addParticipant(instanceToSwapInName, "1000", "zone_1000", InstanceConstants.InstanceOperation.SWAP_IN, true, -1); } @@ -578,7 +405,7 @@ public void testNodeSwapSwapInNodeNoInstanceOperationEnabled() throws Exception "START TestInstanceOperation.testNodeSwapSwapInNodeNoInstanceOperationEnabled() at " + new Date(System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Set instance's InstanceOperation to SWAP_OUT String instanceToSwapOutName = _participants.get(0).getInstanceName(); @@ -590,9 +417,14 @@ public void testNodeSwapSwapInNodeNoInstanceOperationEnabled() throws Exception // Add instance with same logicalId with InstanceOperation unset // This should work because adding instance with InstanceOperation unset will automatically // set the InstanceOperation to SWAP_IN. - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, true, -1); + + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); + Assert.assertTrue(_gSetupTool.getClusterManagementTool() + .completeSwapIfPossible(CLUSTER_NAME, instanceToSwapOutName)); + Assert.assertTrue(_clusterVerifier.verifyByPolling()); } @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testNodeSwapSwapInNodeNoInstanceOperationEnabled") @@ -601,7 +433,7 @@ public void testNodeSwapSwapInNodeWithAlreadySwappingPair() throws Exception { "START TestInstanceOperation.testNodeSwapSwapInNodeWithAlreadySwappingPair() at " + new Date(System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Set instance's InstanceOperation to SWAP_OUT String instanceToSwapOutName = _participants.get(0).getInstanceName(); @@ -611,15 +443,14 @@ public void testNodeSwapSwapInNodeWithAlreadySwappingPair() throws Exception { InstanceConstants.InstanceOperation.SWAP_OUT); // Add instance with InstanceOperation set to SWAP_IN - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), InstanceConstants.InstanceOperation.SWAP_IN, true, -1); // Add another instance with InstanceOperation set to SWAP_IN with same logicalId as previously // added SWAP_IN instance. - String secondInstanceToSwapInName = - PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String secondInstanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; addParticipant(secondInstanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), @@ -627,35 +458,10 @@ public void testNodeSwapSwapInNodeWithAlreadySwappingPair() throws Exception { } @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testNodeSwapSwapInNodeWithAlreadySwappingPair") - public void testNodeSwapNoTopologySetup() throws Exception { - System.out.println("START TestInstanceOperation.testNodeSwapNoTopologySetup() at " + new Date( - System.currentTimeMillis())); - disableTopologyAwareRebalance(); - resetInstances(); - - // Set instance's InstanceOperation to SWAP_OUT - String instanceToSwapOutName = _participants.get(0).getInstanceName(); - _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToSwapOutName, - InstanceConstants.InstanceOperation.SWAP_OUT); - - // Add instance with InstanceOperation set to SWAP_IN - // There should be an error that the logicalId does not have SWAP_OUT instance because, - // helix can't determine what topology key to use to get the logicalId if TOPOLOGY is not set. - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); - InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() - .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); - addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), - instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), - InstanceConstants.InstanceOperation.SWAP_IN, true, -1); - } - - @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testNodeSwapNoTopologySetup") public void testNodeSwapWrongFaultZone() throws Exception { System.out.println("START TestInstanceOperation.testNodeSwapWrongFaultZone() at " + new Date( System.currentTimeMillis())); - // Re-enable topology aware rebalancing and set TOPOLOGY. - enabledTopologyAwareRebalance(); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Set instance's InstanceOperation to SWAP_OUT String instanceToSwapOutName = _participants.get(0).getInstanceName(); @@ -664,7 +470,7 @@ public void testNodeSwapWrongFaultZone() throws Exception { // Add instance with InstanceOperation set to SWAP_IN // There should be an error because SWAP_IN instance must be in the same FAULT_ZONE as the SWAP_OUT instance. - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), @@ -676,7 +482,7 @@ public void testNodeSwapWrongFaultZone() throws Exception { public void testNodeSwapWrongCapacity() throws Exception { System.out.println("START TestInstanceOperation.testNodeSwapWrongCapacity() at " + new Date( System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Set instance's InstanceOperation to SWAP_OUT String instanceToSwapOutName = _participants.get(0).getInstanceName(); @@ -685,7 +491,7 @@ public void testNodeSwapWrongCapacity() throws Exception { // Add instance with InstanceOperation set to SWAP_IN // There should be an error because SWAP_IN instance must have same capacity as the SWAP_OUT node. - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; InstanceConfig instanceToSwapOutInstanceConfig = _gSetupTool.getClusterManagementTool() .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), @@ -697,7 +503,7 @@ public void testNodeSwapWrongCapacity() throws Exception { public void testNodeSwap() throws Exception { System.out.println( "START TestInstanceOperation.testNodeSwap() at " + new Date(System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Store original EV Map originalEVs = getEVs(); @@ -717,7 +523,7 @@ public void testNodeSwap() throws Exception { Collections.emptySet(), Collections.emptySet()); // Add instance with InstanceOperation set to SWAP_IN - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; swapOutInstancesToSwapInInstances.put(instanceToSwapOutName, instanceToSwapInName); addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), @@ -752,13 +558,11 @@ public void testNodeSwap() throws Exception { // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. Assert.assertFalse(_gSetupTool.getClusterManagementTool() .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); - Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapOutName).size(), - 0); // Validate that the SWAP_IN instance has the same partitions the SWAP_OUT instance had before // swap was completed. - validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, - Collections.emptySet(), Set.of(instanceToSwapInName)); + verifier(() -> (validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Set.of(instanceToSwapInName))), TIMEOUT); } @Test(dependsOnMethods = "testNodeSwap") @@ -767,7 +571,7 @@ public void testNodeSwapSwapInNodeNoInstanceOperationDisabled() throws Exception "START TestInstanceOperation.testNodeSwapSwapInNodeNoInstanceOperationDisabled() at " + new Date(System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Store original EVs Map originalEVs = getEVs(); @@ -787,7 +591,7 @@ public void testNodeSwapSwapInNodeNoInstanceOperationDisabled() throws Exception Collections.emptySet(), Collections.emptySet()); // Add instance with InstanceOperation unset, should automatically be set to SWAP_IN - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; swapOutInstancesToSwapInInstances.put(instanceToSwapOutName, instanceToSwapInName); addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, false, -1); @@ -823,13 +627,11 @@ public void testNodeSwapSwapInNodeNoInstanceOperationDisabled() throws Exception // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. Assert.assertFalse(_gSetupTool.getClusterManagementTool() .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); - Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapOutName).size(), - 0); // Validate that the SWAP_IN instance has the same partitions the SWAP_OUT instance had before // swap was completed. - validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, - Collections.emptySet(), Set.of(instanceToSwapInName)); + verifier(() -> (validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Set.of(instanceToSwapInName))), TIMEOUT); } @Test(dependsOnMethods = "testNodeSwapSwapInNodeNoInstanceOperationDisabled") @@ -838,7 +640,7 @@ public void testNodeSwapCancelSwapWhenReadyToComplete() throws Exception { "START TestInstanceOperation.testNodeSwapCancelSwapWhenReadyToComplete() at " + new Date( System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Store original EVs Map originalEVs = getEVs(); @@ -853,12 +655,12 @@ public void testNodeSwapCancelSwapWhenReadyToComplete() throws Exception { InstanceConstants.InstanceOperation.SWAP_OUT); // Validate that the assignment has not changed since setting the InstanceOperation to SWAP_OUT - Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); + Assert.assertTrue(_clusterVerifier.verifyByPolling()); validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Collections.emptySet(), Collections.emptySet()); // Add instance with InstanceOperation set to SWAP_IN - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; swapOutInstancesToSwapInInstances.put(instanceToSwapOutName, instanceToSwapInName); addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), @@ -903,12 +705,9 @@ public void testNodeSwapCancelSwapWhenReadyToComplete() throws Exception { Assert.assertTrue(_clusterVerifier.verifyByPolling()); - // Validate there are no partitions on the SWAP_IN instance. - Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapInName).size(), 0); - // Validate that the SWAP_OUT instance has the same partitions as it had before. - validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, - Collections.emptySet(), Collections.emptySet()); + verifier(() -> (validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Collections.emptySet())), TIMEOUT); } @Test(dependsOnMethods = "testNodeSwapCancelSwapWhenReadyToComplete") @@ -916,7 +715,7 @@ public void testNodeSwapAfterEMM() throws Exception { System.out.println("START TestInstanceOperation.testNodeSwapAfterEMM() at " + new Date( System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Store original EVs Map originalEVs = getEVs(); @@ -940,7 +739,7 @@ public void testNodeSwapAfterEMM() throws Exception { Collections.emptySet(), Collections.emptySet()); // Add instance with InstanceOperation set to SWAP_IN - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; swapOutInstancesToSwapInInstances.put(instanceToSwapOutName, instanceToSwapInName); addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), @@ -983,13 +782,11 @@ public void testNodeSwapAfterEMM() throws Exception { // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. Assert.assertFalse(_gSetupTool.getClusterManagementTool() .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); - Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapOutName).size(), - 0); // Validate that the SWAP_IN instance has the same partitions the SWAP_OUT instance had before // swap was completed. - validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, - Collections.emptySet(), Set.of(instanceToSwapInName)); + verifier(() -> (validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Set.of(instanceToSwapInName))), TIMEOUT); } @Test(dependsOnMethods = "testNodeSwapAfterEMM") @@ -998,7 +795,7 @@ public void testNodeSwapWithSwapOutInstanceDisabled() throws Exception { "START TestInstanceOperation.testNodeSwapWithSwapOutInstanceDisabled() at " + new Date( System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Store original EVs Map originalEVs = getEVs(); @@ -1026,7 +823,7 @@ public void testNodeSwapWithSwapOutInstanceDisabled() throws Exception { Assert.assertTrue(swapOutInstanceOfflineStates.contains("OFFLINE")); // Add instance with InstanceOperation set to SWAP_IN - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), InstanceConstants.InstanceOperation.SWAP_IN, true, -1); @@ -1067,8 +864,10 @@ public void testNodeSwapWithSwapOutInstanceDisabled() throws Exception { // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. Assert.assertFalse(_gSetupTool.getClusterManagementTool() .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); - Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapOutName).size(), - 0); + + verifier( + () -> (getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapOutName).isEmpty()), + TIMEOUT); } @Test(expectedExceptions = HelixException.class, dependsOnMethods = "testNodeSwapWithSwapOutInstanceDisabled") @@ -1076,7 +875,7 @@ public void testNodeSwapAddSwapInFirstEnabledBeforeSwapOutSet() { System.out.println( "START TestInstanceOperation.testNodeSwapAddSwapInFirstEnabledBeforeSwapOutSet() at " + new Date(System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Get the SWAP_OUT instance. String instanceToSwapOutName = _participants.get(0).getInstanceName(); @@ -1084,7 +883,7 @@ public void testNodeSwapAddSwapInFirstEnabledBeforeSwapOutSet() { .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); // Add instance with InstanceOperation set to SWAP_IN enabled before setting SWAP_OUT instance. - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, true, -1); } @@ -1094,7 +893,7 @@ public void testNodeSwapAddSwapInFirstEnableBeforeSwapOutSet() { System.out.println( "START TestInstanceOperation.testNodeSwapAddSwapInFirstEnableBeforeSwapOutSet() at " + new Date(System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Get the SWAP_OUT instance. String instanceToSwapOutName = _participants.get(0).getInstanceName(); @@ -1102,7 +901,7 @@ public void testNodeSwapAddSwapInFirstEnableBeforeSwapOutSet() { .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); // Add instance with InstanceOperation set to SWAP_IN - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, false, -1); @@ -1117,7 +916,7 @@ public void testUnsetInstanceOperationOnSwapInWhenAlreadyUnsetOnSwapOut() { System.out.println( "START TestInstanceOperation.testUnsetInstanceOperationOnSwapInWhenAlreadyUnsetOnSwapOut() at " + new Date(System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Get the SWAP_OUT instance. String instanceToSwapOutName = _participants.get(0).getInstanceName(); @@ -1125,7 +924,7 @@ public void testUnsetInstanceOperationOnSwapInWhenAlreadyUnsetOnSwapOut() { .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); // Add instance with InstanceOperation set to SWAP_IN - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, false, -1); @@ -1139,10 +938,10 @@ public void testUnsetInstanceOperationOnSwapInWhenAlreadyUnsetOnSwapOut() { } @Test(dependsOnMethods = "testUnsetInstanceOperationOnSwapInWhenAlreadyUnsetOnSwapOut") - public void testNodeSwapAddSwapInFirst() { + public void testNodeSwapAddSwapInFirst() throws Exception { System.out.println("START TestInstanceOperation.testNodeSwapAddSwapInFirst() at " + new Date( System.currentTimeMillis())); - resetInstances(); + removeOfflineOrDisabledOrSwapInInstances(); // Store original EV Map originalEVs = getEVs(); @@ -1155,13 +954,13 @@ public void testNodeSwapAddSwapInFirst() { .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName); // Add instance with InstanceOperation set to SWAP_IN - String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + (START_PORT + _participants.size()); + String instanceToSwapInName = PARTICIPANT_PREFIX + "_" + _nextStartPort; swapOutInstancesToSwapInInstances.put(instanceToSwapOutName, instanceToSwapInName); addParticipant(instanceToSwapInName, instanceToSwapOutInstanceConfig.getLogicalId(LOGICAL_ID), instanceToSwapOutInstanceConfig.getDomainAsMap().get(ZONE), null, false, -1); // Validate that the assignment has not changed since setting the InstanceOperation to SWAP_OUT - Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Assert.assertTrue(_bestPossibleClusterVerifier.verifyByPolling()); validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, Collections.emptySet(), Collections.emptySet()); @@ -1201,13 +1000,234 @@ public void testNodeSwapAddSwapInFirst() { // Assert that SWAP_OUT instance is disabled and has no partitions assigned to it. Assert.assertFalse(_gSetupTool.getClusterManagementTool() .getInstanceConfig(CLUSTER_NAME, instanceToSwapOutName).getInstanceEnabled()); - Assert.assertEquals(getPartitionsAndStatesOnInstance(getEVs(), instanceToSwapOutName).size(), - 0); // Validate that the SWAP_IN instance has the same partitions the SWAP_OUT instance had before // swap was completed. - validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, - Collections.emptySet(), Set.of(instanceToSwapInName)); + verifier(() -> (validateEVsCorrect(getEVs(), originalEVs, swapOutInstancesToSwapInInstances, + Collections.emptySet(), Set.of(instanceToSwapInName))), TIMEOUT); + } + + @Test(dependsOnMethods = "testNodeSwapAddSwapInFirst") + public void testEvacuateAndCancelBeforeBootstrapFinish() throws Exception { + System.out.println( + "START TestInstanceOperation.testEvacuateAndCancelBeforeBootstrapFinish() at " + new Date( + System.currentTimeMillis())); + removeOfflineOrDisabledOrSwapInInstances(); + + // add a resource where downward state transition is slow + createResourceWithDelayedRebalance(CLUSTER_NAME, "TEST_DB3_DELAYED_CRUSHED", "MasterSlave", + PARTITIONS, REPLICA, REPLICA - 1, 200000, CrushEdRebalanceStrategy.class.getName()); + _allDBs.add("TEST_DB3_DELAYED_CRUSHED"); + // add a resource where downward state transition is slow + createResourceWithWagedRebalance(CLUSTER_NAME, "TEST_DB4_DELAYED_WAGED", "MasterSlave", + PARTITIONS, REPLICA, REPLICA - 1); + _allDBs.add("TEST_DB4_DELAYED_WAGED"); + // wait for assignment to finish + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // set bootstrap ST delay to a large number + _stateModelDelay = -10000L; + // evacuate an instance + String instanceToEvacuate = _participants.get(0).getInstanceName(); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, + InstanceConstants.InstanceOperation.EVACUATE); + // Messages should be pending at all instances besides the evacuate one + for (String participant : _participantNames) { + if (participant.equals(instanceToEvacuate)) { + continue; + } + verifier(() -> ((_dataAccessor.getChildNames( + _dataAccessor.keyBuilder().messages(participant))).isEmpty()), 30000); + } + Assert.assertFalse(_admin.isEvacuateFinished(CLUSTER_NAME, instanceToEvacuate)); + Assert.assertFalse(_admin.isReadyForPreparingJoiningCluster(CLUSTER_NAME, instanceToEvacuate)); + + // sleep a bit so ST messages can start executing + Thread.sleep(Math.abs(_stateModelDelay / 100)); + // before we cancel, check current EV + Map assignment = getEVs(); + for (String resource : _allDBs) { + // check every replica has >= 3 partitions and a top state partition + validateAssignmentInEv(assignment.get(resource)); + } + + // cancel the evacuation + _gSetupTool.getClusterManagementTool() + .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, null); + + assignment = getEVs(); + for (String resource : _allDBs) { + // check every replica has >= 3 active replicas, even before cluster converge + validateAssignmentInEv(assignment.get(resource)); + } + + // check cluster converge. We have longer delay for ST then verifier timeout. It will only converge if we cancel ST. + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // EV should contain all participants, check resources one by one + assignment = getEVs(); + for (String resource : _allDBs) { + Assert.assertTrue( + getParticipantsInEv(assignment.get(resource)).containsAll(_participantNames)); + // check every replica has >= 3 active replicas again + validateAssignmentInEv(assignment.get(resource)); + } + } + + @Test(dependsOnMethods = "testEvacuateAndCancelBeforeBootstrapFinish") + public void testEvacuateAndCancelBeforeDropFinish() throws Exception { + System.out.println( + "START TestInstanceOperation.testEvacuateAndCancelBeforeDropFinish() at " + new Date( + System.currentTimeMillis())); + + // set DROP ST delay to a large number + _stateModelDelay = 10000L; + + // evacuate an instance + String instanceToEvacuate = _participants.get(0).getInstanceName(); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, + InstanceConstants.InstanceOperation.EVACUATE); + + // message should be pending at the to evacuate participant + verifier(() -> ((_dataAccessor.getChildNames( + _dataAccessor.keyBuilder().messages(instanceToEvacuate))).isEmpty()), 30000); + Assert.assertFalse(_admin.isEvacuateFinished(CLUSTER_NAME, instanceToEvacuate)); + + // cancel evacuation + _gSetupTool.getClusterManagementTool() + .setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, null); + // check every replica has >= 3 active replicas, even before cluster converge + Map assignment = getEVs(); + for (String resource : _allDBs) { + validateAssignmentInEv(assignment.get(resource)); + } + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // EV should contain all participants, check resources one by one + assignment = getEVs(); + for (String resource : _allDBs) { + Assert.assertTrue( + getParticipantsInEv(assignment.get(resource)).containsAll(_participantNames)); + // check every replica has >= 3 active replicas + validateAssignmentInEv(assignment.get(resource)); + } + } + + @Test(dependsOnMethods = "testEvacuateAndCancelBeforeDropFinish") + public void testMarkEvacuationAfterEMM() throws Exception { + System.out.println("START TestInstanceOperation.testMarkEvacuationAfterEMM() at " + new Date( + System.currentTimeMillis())); + _stateModelDelay = 1000L; + Assert.assertFalse(_gSetupTool.getClusterManagementTool().isInMaintenanceMode(CLUSTER_NAME)); + _gSetupTool.getClusterManagementTool() + .manuallyEnableMaintenanceMode(CLUSTER_NAME, true, null, null); + String newParticipantName = PARTICIPANT_PREFIX + "_" + _nextStartPort; + addParticipant(newParticipantName); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Map assignment = getEVs(); + for (String resource : _allDBs) { + Assert.assertFalse( + getParticipantsInEv(assignment.get(resource)).contains(newParticipantName)); + } + + // set evacuate operation + String instanceToEvacuate = _participants.get(0).getInstanceName(); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, instanceToEvacuate, + InstanceConstants.InstanceOperation.EVACUATE); + + // there should be no evacuation happening + for (String resource : _allDBs) { + Assert.assertTrue(getParticipantsInEv(assignment.get(resource)).contains(instanceToEvacuate)); + } + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + // exit MM + _gSetupTool.getClusterManagementTool() + .manuallyEnableMaintenanceMode(CLUSTER_NAME, false, null, null); + + Assert.assertTrue(_clusterVerifier.verifyByPolling()); + + assignment = getEVs(); + List currentActiveInstances = + _participantNames.stream().filter(n -> !n.equals(instanceToEvacuate)) + .collect(Collectors.toList()); + for (String resource : _allDBs) { + validateAssignmentInEv(assignment.get(resource)); + Set newPAssignedParticipants = getParticipantsInEv(assignment.get(resource)); + Assert.assertFalse(newPAssignedParticipants.contains(instanceToEvacuate)); + Assert.assertTrue(newPAssignedParticipants.containsAll(currentActiveInstances)); + } + Assert.assertTrue(_admin.isReadyForPreparingJoiningCluster(CLUSTER_NAME, instanceToEvacuate)); + + _stateModelDelay = 3L; + } + + @Test(dependsOnMethods = "testMarkEvacuationAfterEMM") + public void testEvacuationWithOfflineInstancesInCluster() throws Exception { + System.out.println( + "START TestInstanceOperation.testEvacuationWithOfflineInstancesInCluster() at " + new Date( + System.currentTimeMillis())); + _participants.get(1).syncStop(); + _participants.get(2).syncStop(); + + String evacuateInstanceName = _participants.get(_participants.size() - 2).getInstanceName(); + _gSetupTool.getClusterManagementTool().setInstanceOperation(CLUSTER_NAME, evacuateInstanceName, + InstanceConstants.InstanceOperation.EVACUATE); + + Map assignment; + // EV should contain all participants, check resources one by one + assignment = getEVs(); + for (String resource : _allDBs) { + verifier(() -> { + ExternalView ev = assignment.get(resource); + for (String partition : ev.getPartitionSet()) { + AtomicInteger activeReplicaCount = new AtomicInteger(); + ev.getStateMap(partition).values().stream().filter( + v -> v.equals("MASTER") || v.equals("LEADER") || v.equals("SLAVE") || v.equals( + "FOLLOWER") || v.equals("STANDBY")) + .forEach(v -> activeReplicaCount.getAndIncrement()); + if (activeReplicaCount.get() < REPLICA - 1 || ( + ev.getStateMap(partition).containsKey(evacuateInstanceName) && ev.getStateMap( + partition).get(evacuateInstanceName).equals("MASTER") && ev.getStateMap(partition) + .get(evacuateInstanceName).equals("LEADER"))) { + return false; + } + } + return true; + }, 30000); + } + + removeOfflineOrDisabledOrSwapInInstances(); + addParticipant(PARTICIPANT_PREFIX + "_" + _nextStartPort); + addParticipant(PARTICIPANT_PREFIX + "_" + _nextStartPort); + dropTestDBs(ImmutableSet.of("TEST_DB3_DELAYED_CRUSHED", "TEST_DB4_DELAYED_WAGED")); + } + + /** + * Verifies that the given verifier returns true within the given timeout. Handles AssertionError + * by returning false, which TestHelper.verify will not do. Asserts that return value from + * TestHelper.verify is true. + * + * @param verifier the verifier to run + * @param timeout the timeout to wait for the verifier to return true + * @throws Exception if TestHelper.verify throws an exception + */ + private static void verifier(TestHelper.Verifier verifier, long timeout) throws Exception { + Assert.assertTrue(TestHelper.verify(() -> { + try { + boolean result = verifier.verify(); + if (!result) { + LOG.error("Verifier returned false, retrying..."); + } + return result; + } catch (AssertionError e) { + LOG.error("Caught AssertionError on verifier attempt: ", e); + return false; + } + }, timeout)); } private MockParticipantManager createParticipant(String participantName) { @@ -1237,6 +1257,7 @@ private void addParticipant(String participantName, String logicalId, String zon participant.syncStart(); _participants.add(participant); _participantNames.add(participantName); + _nextStartPort++; } private void addParticipant(String participantName) { @@ -1257,10 +1278,10 @@ private void createTestDBs(long delayTime) throws InterruptedException { PARTITIONS, REPLICA, REPLICA - 1); _allDBs.add("TEST_DB2_WAGED"); - Assert.assertTrue(_clusterVerifier.verifyByPolling()); + Assert.assertTrue(_clusterVerifier.verifyByPolling()); } - private void dropTestDBs(Set dbs) { + private void dropTestDBs(Set dbs) throws Exception { for (String db : dbs) { _gSetupTool.getClusterManagementTool().dropResource(CLUSTER_NAME, db); _allDBs.remove(db); @@ -1394,7 +1415,7 @@ private void validateEVCorrect(ExternalView actual, ExternalView original, } } - private void validateEVsCorrect(Map actuals, + private boolean validateEVsCorrect(Map actuals, Map originals, Map swapOutInstancesToSwapInInstances, Set inFlightSwapInInstances, Set completedSwapInInstanceNames) { Assert.assertEquals(actuals.keySet(), originals.keySet()); @@ -1402,6 +1423,7 @@ private void validateEVsCorrect(Map actuals, validateEVCorrect(actuals.get(resource), originals.get(resource), swapOutInstancesToSwapInInstances, inFlightSwapInInstances, completedSwapInInstanceNames); } + return true; } private void validateAssignmentInEv(ExternalView ev) { @@ -1461,8 +1483,8 @@ public StDelayMSStateModel() { private void sleepWhileNotCanceled(long sleepTime) throws InterruptedException{ while(sleepTime >0 && !isCancelled()) { - Thread.sleep(5000); - sleepTime = sleepTime - 5000; + Thread.sleep(TIMEOUT); + sleepTime = sleepTime - TIMEOUT; } if (isCancelled()) { _cancelled = false;