diff --git a/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java b/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java index 5f179e784e..6c542ab7bd 100644 --- a/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java +++ b/helix-core/src/main/java/org/apache/helix/util/InstanceValidationUtil.java @@ -32,6 +32,7 @@ import org.apache.helix.HelixDefinedState; import org.apache.helix.HelixException; import org.apache.helix.PropertyKey; +import org.apache.helix.constants.InstanceConstants; import org.apache.helix.model.ClusterConfig; import org.apache.helix.model.CurrentState; import org.apache.helix.model.ExternalView; @@ -239,6 +240,23 @@ public static boolean hasErrorPartitions(HelixDataAccessor dataAccessor, String return false; } + /** + * Checks if the specified instance is marked for an ongoing instance operation. Currently, + * this method only checks for evacuation. + * + * @param dataAccessor The accessor for retrieving Helix data properties. + * @param instanceName An instance to be evaluated. + * @return + */ + public static boolean isOperationSetForInstance(HelixDataAccessor dataAccessor, + String instanceName) { + PropertyKey.Builder propertyKeyBuilder = dataAccessor.keyBuilder(); + InstanceConfig instanceConfig = + dataAccessor.getProperty(propertyKeyBuilder.instanceConfig(instanceName)); + return InstanceConstants.InstanceOperation.EVACUATE.name() + .equals(instanceConfig.getInstanceOperation()); + } + /** * Get the problematic partitions on the to-be-stop instance * Requirement: @@ -295,12 +313,17 @@ public static Map> perPartitionHealthCheck(List new ArrayList<>()) @@ -451,9 +474,11 @@ public static boolean siblingNodesActiveReplicaCheck(HelixDataAccessor dataAcces if (stateByInstanceMap.containsKey(instanceName)) { int numHealthySiblings = 0; for (Map.Entry entry : stateByInstanceMap.entrySet()) { - if (!entry.getKey().equals(instanceName) && (toBeStoppedInstances == null - || !toBeStoppedInstances.contains(entry.getKey())) && !unhealthyStates.contains( - entry.getValue())) { + String siblingInstanceName = entry.getKey(); + if (!siblingInstanceName.equals(instanceName) && (toBeStoppedInstances == null + || !toBeStoppedInstances.contains(siblingInstanceName)) + && !unhealthyStates.contains(entry.getValue()) && !isOperationSetForInstance( + dataAccessor, siblingInstanceName)) { numHealthySiblings++; } } diff --git a/helix-core/src/test/java/org/apache/helix/util/TestInstanceValidationUtil.java b/helix-core/src/test/java/org/apache/helix/util/TestInstanceValidationUtil.java index aa1ba32290..1ffba3c3d4 100644 --- a/helix-core/src/test/java/org/apache/helix/util/TestInstanceValidationUtil.java +++ b/helix-core/src/test/java/org/apache/helix/util/TestInstanceValidationUtil.java @@ -33,6 +33,7 @@ import org.apache.helix.HelixException; import org.apache.helix.PropertyKey; import org.apache.helix.PropertyType; +import org.apache.helix.constants.InstanceConstants; import org.apache.helix.model.ClusterConfig; import org.apache.helix.model.CurrentState; import org.apache.helix.model.ExternalView; @@ -45,6 +46,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.any; import static org.mockito.Mockito.argThat; @@ -375,7 +377,7 @@ public void TestSiblingNodesActiveReplicaCheck_success() { String resource = "resource"; Mock mock = new Mock(); doReturn(ImmutableList.of(resource)).when(mock.dataAccessor) - .getChildNames(argThat(new PropertyKeyArgument(PropertyType.EXTERNALVIEW))); + .getChildNames(argThat(new PropertyKeyArgument(PropertyType.IDEALSTATES))); // set ideal state IdealState idealState = mock(IdealState.class); when(idealState.isEnabled()).thenReturn(true); @@ -397,6 +399,9 @@ public void TestSiblingNodesActiveReplicaCheck_success() { doReturn(stateModelDefinition).when(mock.dataAccessor) .getProperty(argThat(new PropertyKeyArgument(PropertyType.STATEMODELDEFS))); + // set up default instances config + setDefaultInstanceConfigs(mock); + boolean result = InstanceValidationUtil.siblingNodesActiveReplicaCheck(mock.dataAccessor, TEST_INSTANCE); @@ -430,6 +435,9 @@ public void TestSiblingNodesActiveReplicaCheckSuccessWithToBeStoppedInstances() doReturn(stateModelDefinition).when(mock.dataAccessor) .getProperty(argThat(new PropertyKeyArgument(PropertyType.STATEMODELDEFS))); + // set default instance config + setDefaultInstanceConfigs(mock); + Set toBeStoppedInstances = new HashSet<>(); toBeStoppedInstances.add("instance3"); toBeStoppedInstances.add("invalidInstances"); // include an invalid instance. @@ -469,6 +477,9 @@ public void TestSiblingNodesActiveReplicaCheckFailsWithToBeStoppedInstances() { doReturn(stateModelDefinition).when(mock.dataAccessor) .getProperty(argThat(new PropertyKeyArgument(PropertyType.STATEMODELDEFS))); + // set default instance config + setDefaultInstanceConfigs(mock); + Set toBeStoppedInstances = new HashSet<>(); toBeStoppedInstances.add("instance1"); toBeStoppedInstances.add("instance2"); @@ -478,6 +489,59 @@ public void TestSiblingNodesActiveReplicaCheckFailsWithToBeStoppedInstances() { Assert.assertFalse(result); } + @Test + public void TestSiblingNodesActiveReplicaCheckFailsWithEvacuatingInstance() { + String resource = "resource"; + Mock mock = new Mock(); + doReturn(ImmutableList.of(resource)).when(mock.dataAccessor) + .getChildNames(argThat(new PropertyKeyArgument(PropertyType.IDEALSTATES))); + // set ideal state + IdealState idealState = mock(IdealState.class); + when(idealState.isEnabled()).thenReturn(true); + when(idealState.isValid()).thenReturn(true); + when(idealState.getStateModelDefRef()).thenReturn("MasterSlave"); + doReturn(idealState).when(mock.dataAccessor).getProperty(argThat(new PropertyKeyArgument(PropertyType.IDEALSTATES))); + + // set external view + ExternalView externalView = mock(ExternalView.class); + when(externalView.getMinActiveReplicas()).thenReturn(2); + when(externalView.getStateModelDefRef()).thenReturn("MasterSlave"); + when(externalView.getPartitionSet()).thenReturn(ImmutableSet.of("db0")); + when(externalView.getStateMap("db0")).thenReturn(ImmutableMap.of(TEST_INSTANCE, "Master", + "instance1", "Slave", "instance2", "Slave", "instance3", "Slave")); + doReturn(externalView).when(mock.dataAccessor) + .getProperty(argThat(new PropertyKeyArgument(PropertyType.EXTERNALVIEW))); + StateModelDefinition stateModelDefinition = mock(StateModelDefinition.class); + when(stateModelDefinition.getInitialState()).thenReturn("OFFLINE"); + doReturn(stateModelDefinition).when(mock.dataAccessor) + .getProperty(argThat(new PropertyKeyArgument(PropertyType.STATEMODELDEFS))); + + // Set instance operation to be EVACUATE for instance2 + InstanceConfig instanceConfig2 = new InstanceConfig("instance2"); + InstanceConfig instanceConfig3 = new InstanceConfig("instance3"); + instanceConfig2.setInstanceOperation(InstanceConstants.InstanceOperation.EVACUATE); + doReturn(instanceConfig2).when(mock.dataAccessor) + .getProperty(BUILDER.instanceConfig("instance2")); + doReturn(instanceConfig3).when(mock.dataAccessor) + .getProperty(BUILDER.instanceConfig("instance3")); + + // Add instance1 to toBeStoppedInstances + Set toBeStoppedInstances = new HashSet<>(); + toBeStoppedInstances.add("instance1"); + boolean result = + InstanceValidationUtil.siblingNodesActiveReplicaCheck(mock.dataAccessor, TEST_INSTANCE, toBeStoppedInstances); + Assert.assertFalse(result); + + // Remove instance1 from toBeStoppedInstances and add its config + InstanceConfig instanceConfig1 = new InstanceConfig("instance1"); + doReturn(instanceConfig1).when(mock.dataAccessor) + .getProperty(BUILDER.instanceConfig("instance1")); + toBeStoppedInstances = new HashSet<>(); + result = + InstanceValidationUtil.siblingNodesActiveReplicaCheck(mock.dataAccessor, TEST_INSTANCE, toBeStoppedInstances); + Assert.assertTrue(result); + } + @Test public void TestSiblingNodesActiveReplicaCheck_fail() { String resource = "resource"; @@ -504,6 +568,9 @@ public void TestSiblingNodesActiveReplicaCheck_fail() { doReturn(stateModelDefinition).when(mock.dataAccessor) .getProperty(argThat(new PropertyKeyArgument(PropertyType.STATEMODELDEFS))); + // set default instance config + setDefaultInstanceConfigs(mock); + boolean result = InstanceValidationUtil.siblingNodesActiveReplicaCheck(mock.dataAccessor, TEST_INSTANCE); @@ -552,6 +619,19 @@ public void TestSiblingNodesActiveReplicaCheck_exception_whenExternalViewUnavail InstanceValidationUtil.siblingNodesActiveReplicaCheck(mock.dataAccessor, TEST_INSTANCE); } + private void setDefaultInstanceConfigs(Mock mock) { + // set default instance config + InstanceConfig instanceConfig1 = new InstanceConfig("instance1"); + doReturn(instanceConfig1).when(mock.dataAccessor) + .getProperty(BUILDER.instanceConfig("instance1")); + InstanceConfig instanceConfig2 = new InstanceConfig("instance2"); + doReturn(instanceConfig2).when(mock.dataAccessor) + .getProperty(BUILDER.instanceConfig("instance2")); + InstanceConfig instanceConfig3 = new InstanceConfig("instance3"); + doReturn(instanceConfig3).when(mock.dataAccessor) + .getProperty(BUILDER.instanceConfig("instance3")); + } + private class Mock { HelixDataAccessor dataAccessor; ConfigAccessor configAccessor; diff --git a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementService.java b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementService.java index c3fa04966f..0f617eea97 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementService.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementService.java @@ -93,6 +93,10 @@ public class MaintenanceManagementService { private final HelixDataAccessorWrapper _dataAccessor; private final Set _nonBlockingHealthChecks; private final Set _skipHealthCheckCategories; + // Set the default value of _stoppableHealthCheckList to be the list of all stoppable checks to + // maintain the backward compatibility with users who don't use MaintenanceManagementServiceBuilder + // to create the MaintenanceManagementService object. + private List _stoppableHealthCheckList = HealthCheck.STOPPABLE_CHECK_LIST; public MaintenanceManagementService(ZKHelixDataAccessor dataAccessor, ConfigAccessor configAccessor, boolean skipZKRead, String namespace) { @@ -144,6 +148,26 @@ public MaintenanceManagementService(ZKHelixDataAccessor dataAccessor, _namespace = namespace; } + @VisibleForTesting + MaintenanceManagementService(MaintenanceManagementServiceBuilder builder) { + _dataAccessor = + new HelixDataAccessorWrapper(builder.getDataAccessor(), builder.getCustomRestClient(), + builder.getNamespace()); + _configAccessor = builder.getConfigAccessor(); + _customRestClient = builder.getCustomRestClient(); + _skipZKRead = builder.isSkipZKRead(); + _nonBlockingHealthChecks = + builder.isContinueOnFailure() ? Collections.singleton(ALL_HEALTH_CHECK_NONBLOCK) + : Collections.emptySet(); + _skipHealthCheckCategories = + builder.getSkipHealthCheckCategories() == null ? Collections.emptySet() + : builder.getSkipHealthCheckCategories(); + _stoppableHealthCheckList = + builder.getStoppableHealthCheckList() == null ? Collections.emptyList() + : builder.getStoppableHealthCheckList(); + _namespace = builder.getNamespace(); + } + /** * Perform health check and maintenance operation check and execution for a instance in * one cluster. @@ -613,7 +637,7 @@ private StoppableCheck performHelixOwnInstanceCheck(String clusterId, String ins Set toBeStoppedInstances) { LOG.info("Perform helix own custom health checks for {}/{}", clusterId, instanceName); Map helixStoppableCheck = - getInstanceHealthStatus(clusterId, instanceName, HealthCheck.STOPPABLE_CHECK_LIST, + getInstanceHealthStatus(clusterId, instanceName, _stoppableHealthCheckList, toBeStoppedInstances); return new StoppableCheck(helixStoppableCheck, StoppableCheck.Category.HELIX_OWN_CHECK); diff --git a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementServiceBuilder.java b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementServiceBuilder.java new file mode 100644 index 0000000000..9d4687e850 --- /dev/null +++ b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/MaintenanceManagementServiceBuilder.java @@ -0,0 +1,127 @@ +package org.apache.helix.rest.clusterMaintenanceService; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import org.apache.helix.ConfigAccessor; +import org.apache.helix.manager.zk.ZKHelixDataAccessor; +import org.apache.helix.rest.client.CustomRestClient; +import org.apache.helix.rest.server.json.instance.StoppableCheck; + + +public class MaintenanceManagementServiceBuilder { + private ConfigAccessor _configAccessor; + private boolean _skipZKRead; + private String _namespace; + private ZKHelixDataAccessor _dataAccessor; + private CustomRestClient _customRestClient; + private boolean _continueOnFailure; + private Set _skipHealthCheckCategories = Collections.emptySet(); + private List _stoppableHealthCheckList = Collections.emptyList(); + + public ConfigAccessor getConfigAccessor() { + return _configAccessor; + } + + public boolean isSkipZKRead() { + return _skipZKRead; + } + + public String getNamespace() { + return _namespace; + } + + public ZKHelixDataAccessor getDataAccessor() { + return _dataAccessor; + } + + public CustomRestClient getCustomRestClient() { + return _customRestClient; + } + + public boolean isContinueOnFailure() { + return _continueOnFailure; + } + + public Set getSkipHealthCheckCategories() { + return _skipHealthCheckCategories; + } + + public List getStoppableHealthCheckList() { + return _stoppableHealthCheckList; + } + + public MaintenanceManagementServiceBuilder setConfigAccessor(ConfigAccessor configAccessor) { + _configAccessor = configAccessor; + return this; + } + + public MaintenanceManagementServiceBuilder setSkipZKRead(boolean skipZKRead) { + _skipZKRead = skipZKRead; + return this; + } + + public MaintenanceManagementServiceBuilder setNamespace(String namespace) { + _namespace = namespace; + return this; + } + + public MaintenanceManagementServiceBuilder setDataAccessor( + ZKHelixDataAccessor dataAccessor) { + _dataAccessor = dataAccessor; + return this; + } + + public MaintenanceManagementServiceBuilder setCustomRestClient( + CustomRestClient customRestClient) { + _customRestClient = customRestClient; + return this; + } + + public MaintenanceManagementServiceBuilder setContinueOnFailure(boolean continueOnFailure) { + _continueOnFailure = continueOnFailure; + return this; + } + + public MaintenanceManagementServiceBuilder setSkipHealthCheckCategories( + Set skipHealthCheckCategories) { + _skipHealthCheckCategories = skipHealthCheckCategories; + return this; + } + + public MaintenanceManagementServiceBuilder setStoppableHealthCheckList( + List stoppableHealthCheckList) { + _stoppableHealthCheckList = stoppableHealthCheckList; + return this; + } + + public MaintenanceManagementService createMaintenanceManagementService() { + if (_configAccessor == null || _namespace == null || _dataAccessor == null + || _customRestClient == null) { + throw new IllegalArgumentException( + "One or more of following mandatory arguments are not set: '_configAccessor', " + + "'_namespace', '_dataAccessor', '_customRestClient'."); + } + return new MaintenanceManagementService(this); + } +} \ No newline at end of file diff --git a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java index 8cf8bc83cb..25036bed8e 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/clusterMaintenanceService/StoppableInstancesSelector.java @@ -66,7 +66,7 @@ public StoppableInstancesSelector(String clusterId, List orderOfZone, * reasons for non-stoppability. * * @param instances A list of instance to be evaluated. - * @param toBeStoppedInstances A list of instances presumed to be are already stopped + * @param toBeStoppedInstances A list of instances presumed to be already stopped * @return An ObjectNode containing: * - 'stoppableNode': List of instances that can be stopped. * - 'instance_not_stoppable_with_reasons': A map with the instance name as the key and @@ -97,7 +97,7 @@ public ObjectNode getStoppableInstancesInSingleZone(List instances, * non-stoppability. * * @param instances A list of instance to be evaluated. - * @param toBeStoppedInstances A list of instances presumed to be are already stopped + * @param toBeStoppedInstances A list of instances presumed to be already stopped * @return An ObjectNode containing: * - 'stoppableNode': List of instances that can be stopped. * - 'instance_not_stoppable_with_reasons': A map with the instance name as the key and diff --git a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java index 785195ebe1..2f40c6cf0d 100644 --- a/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java +++ b/helix-rest/src/main/java/org/apache/helix/rest/server/resources/helix/InstancesAccessor.java @@ -20,12 +20,12 @@ */ import java.io.IOException; -import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import javax.ws.rs.DefaultValue; import javax.ws.rs.GET; import javax.ws.rs.POST; @@ -40,13 +40,17 @@ import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableList; import org.apache.helix.HelixAdmin; import org.apache.helix.HelixDataAccessor; import org.apache.helix.HelixException; import org.apache.helix.manager.zk.ZKHelixDataAccessor; import org.apache.helix.model.ClusterConfig; import org.apache.helix.model.InstanceConfig; +import org.apache.helix.rest.client.CustomRestClientFactory; +import org.apache.helix.rest.clusterMaintenanceService.HealthCheck; import org.apache.helix.rest.clusterMaintenanceService.MaintenanceManagementService; +import org.apache.helix.rest.clusterMaintenanceService.MaintenanceManagementServiceBuilder; import org.apache.helix.rest.common.HttpConstants; import org.apache.helix.rest.clusterMaintenanceService.StoppableInstancesSelector; import org.apache.helix.rest.server.filters.ClusterAuth; @@ -63,6 +67,11 @@ @Path("/clusters/{clusterId}/instances") public class InstancesAccessor extends AbstractHelixResource { private final static Logger _logger = LoggerFactory.getLogger(InstancesAccessor.class); + // This parameter indicates the users would like perform all existing helix checks on the + // given instances list. + private final static String SELECT_ALL_STOPPABLE_CHECKS = "ALL"; + private final static List ALL_STOPPABLE_CHECK_LIST = + ImmutableList.of(SELECT_ALL_STOPPABLE_CHECKS); public enum InstancesProperties { instances, online, @@ -70,6 +79,7 @@ public enum InstancesProperties { selection_base, zone_order, to_be_stopped_instances, + stoppable_check_list, customized_values, instance_stoppable_parallel, instance_not_stoppable_with_reasons @@ -228,6 +238,7 @@ private Response batchGetStoppableInstances(String clusterId, JsonNode node, boo List orderOfZone = null; String customizedInput = null; List toBeStoppedInstances = Collections.emptyList(); + List stoppableCheckList = null; if (node.get(InstancesAccessor.InstancesProperties.customized_values.name()) != null) { customizedInput = node.get(InstancesAccessor.InstancesProperties.customized_values.name()).toString(); @@ -260,10 +271,44 @@ private Response batchGetStoppableInstances(String clusterId, JsonNode node, boo } } + if (node.get(InstancesProperties.stoppable_check_list.name()) != null) { + List list = OBJECT_MAPPER.readValue( + node.get(InstancesProperties.stoppable_check_list.name()).toString(), + OBJECT_MAPPER.getTypeFactory().constructCollectionType(List.class, String.class)); + + if (ALL_STOPPABLE_CHECK_LIST.equals(list)) { + stoppableCheckList = HealthCheck.STOPPABLE_CHECK_LIST; + } else { + try { + stoppableCheckList = + list.stream().map(HealthCheck::valueOf).collect(Collectors.toList()); + } catch (IllegalArgumentException e) { + String message = + "'stoppable_check_list' has invalid check names: " + list + + ". Supported checks: " + HealthCheck.STOPPABLE_CHECK_LIST; + _logger.error(message, e); + return badRequest(message); + } + } + } else { + // By default, if stoppable_check_list is unset, all checks are performed to maintain + // backward compatibility with existing clients. + stoppableCheckList = HealthCheck.STOPPABLE_CHECK_LIST; + } + + String namespace = getNamespace(); MaintenanceManagementService maintenanceService = - new MaintenanceManagementService((ZKHelixDataAccessor) getDataAccssor(clusterId), - getConfigAccessor(), skipZKRead, continueOnFailures, skipHealthCheckCategories, - getNamespace()); + new MaintenanceManagementServiceBuilder() + .setDataAccessor((ZKHelixDataAccessor) getDataAccssor(clusterId)) + .setConfigAccessor(getConfigAccessor()) + .setSkipZKRead(skipZKRead) + .setContinueOnFailure(continueOnFailures) + .setCustomRestClient(CustomRestClientFactory.get()) + .setSkipHealthCheckCategories(skipHealthCheckCategories) + .setNamespace(namespace) + .setStoppableHealthCheckList(stoppableCheckList) + .createMaintenanceManagementService(); + ClusterService clusterService = new ClusterServiceImpl(getDataAccssor(clusterId), getConfigAccessor()); ClusterTopology clusterTopology = clusterService.getClusterTopology(clusterId); diff --git a/helix-rest/src/test/java/org/apache/helix/rest/clusterMaintenanceService/TestMaintenanceManagementService.java b/helix-rest/src/test/java/org/apache/helix/rest/clusterMaintenanceService/TestMaintenanceManagementService.java index a49a95066f..2b530ff733 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/clusterMaintenanceService/TestMaintenanceManagementService.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/clusterMaintenanceService/TestMaintenanceManagementService.java @@ -39,6 +39,7 @@ import org.apache.helix.manager.zk.ZkBaseDataAccessor; import org.apache.helix.model.ExternalView; import org.apache.helix.model.IdealState; +import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.LeaderStandbySMD; import org.apache.helix.model.MasterSlaveSMD; import org.apache.helix.model.RESTConfig; @@ -60,6 +61,7 @@ import static org.mockito.Matchers.anyList; import static org.mockito.Matchers.anyMap; import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -219,7 +221,8 @@ public void testGetInstanceStoppableCheckWhenCustomInstanceCheckDisabled() throw when(_dataAccessorWrapper.getAllPartitionsHealthOnLiveInstance(any(), anyMap(), anyBoolean())).thenReturn(Collections.emptyMap()); when(_dataAccessorWrapper.getProperty((PropertyKey) any())).thenReturn(new LeaderStandbySMD()); - when(_dataAccessorWrapper.keyBuilder()).thenReturn(new PropertyKey.Builder(TEST_CLUSTER)); + PropertyKey.Builder builder = new PropertyKey.Builder(TEST_CLUSTER); + when(_dataAccessorWrapper.keyBuilder()).thenReturn(builder); when(_dataAccessorWrapper.getChildValues(any(), anyBoolean())).thenReturn( Arrays.asList(externalView)); MockMaintenanceManagementService service = @@ -228,6 +231,14 @@ public void testGetInstanceStoppableCheckWhenCustomInstanceCheckDisabled() throw new HashSet<>(Arrays.asList(StoppableCheck.Category.CUSTOM_INSTANCE_CHECK)), HelixRestNamespace.DEFAULT_NAMESPACE_NAME); + // set default instance config + InstanceConfig instanceConfig1 = new InstanceConfig(TEST_INSTANCE); + doReturn(instanceConfig1).when(_dataAccessorWrapper) + .getProperty(builder.instanceConfig(TEST_INSTANCE)); + InstanceConfig instanceConfig2 = new InstanceConfig("sibling_instance"); + doReturn(instanceConfig2).when(_dataAccessorWrapper) + .getProperty(builder.instanceConfig("sibling_instance")); + StoppableCheck actual = service.getInstanceStoppableCheck(TEST_CLUSTER, TEST_INSTANCE, ""); List expectedFailedChecks = Arrays.asList( StoppableCheck.Category.CUSTOM_PARTITION_CHECK.getPrefix() @@ -311,6 +322,14 @@ public void testCustomPartitionCheckWithSkipZKRead() throws IOException { when(mockAccessor.get(zkHelixDataAccessor.keyBuilder().stateModelDef("MasterSlave").getPath(), new Stat(), AccessOption.PERSISTENT)).thenReturn(MasterSlaveSMD.build().getRecord()); + // set default instance config + InstanceConfig instanceConfig1 = new InstanceConfig(TEST_INSTANCE); + when(mockAccessor.get(zkHelixDataAccessor.keyBuilder().instanceConfig(TEST_INSTANCE).getPath(), + new Stat(), AccessOption.PERSISTENT)).thenReturn(instanceConfig1.getRecord()); + InstanceConfig instanceConfig2 = new InstanceConfig(siblingInstance); + when(mockAccessor.get(zkHelixDataAccessor.keyBuilder().instanceConfig(siblingInstance).getPath(), + new Stat(), AccessOption.PERSISTENT)).thenReturn(instanceConfig2.getRecord()); + // Valid data only from ZK, pass the check MockMaintenanceManagementService instanceServiceReadZK = new MockMaintenanceManagementService(zkHelixDataAccessor, _configAccessor, _customRestClient, false, diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java b/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java index 68561ce839..71fdc76cff 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/AbstractTestClass.java @@ -564,6 +564,8 @@ private void preSetupForParallelInstancesStoppableTest(String clusterName, clusterConfig.setFaultZoneType("helixZoneId"); clusterConfig.setPersistIntermediateAssignment(true); _configAccessor.setClusterConfig(clusterName, clusterConfig); + RESTConfig emptyRestConfig = new RESTConfig(clusterName); + _configAccessor.setRESTConfig(clusterName, emptyRestConfig); // Create instance configs List instanceConfigs = new ArrayList<>(); for (int i = 0; i < instances.size() - 1; i++) { diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java b/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java index 2bc539a4d4..089bac12d8 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/TestInstancesAccessor.java @@ -199,6 +199,66 @@ public void testCrossZoneStoppableWithoutZoneOrder() throws IOException { System.out.println("End test :" + TestHelper.getTestMethodName()); } + @Test + public void testInstanceStoppable_crosszonebased_with_selectedchecklist() throws IOException { + System.out.println("Start test :" + TestHelper.getTestMethodName()); + // Select instances with cross zone based and perform all checks + String content = + String.format("{\"%s\":\"%s\",\"%s\":[\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\", \"%s\"], \"%s\":[\"%s\"]}", + InstancesAccessor.InstancesProperties.selection_base.name(), + InstancesAccessor.InstanceHealthSelectionBase.cross_zone_based.name(), + InstancesAccessor.InstancesProperties.instances.name(), "instance0", "instance1", + "instance2", "instance3", "instance4", "instance5", "invalidInstance", + InstancesAccessor.InstancesProperties.stoppable_check_list.name(), "ALL"); + Response response = + new JerseyUriRequestBuilder("clusters/{}/instances?command=stoppable").format( + STOPPABLE_CLUSTER).post(this, Entity.entity(content, MediaType.APPLICATION_JSON_TYPE)); + JsonNode jsonNode = OBJECT_MAPPER.readTree(response.readEntity(String.class)); + Assert.assertFalse( + jsonNode.withArray(InstancesAccessor.InstancesProperties.instance_stoppable_parallel.name()) + .elements().hasNext()); + JsonNode nonStoppableInstances = jsonNode.get( + InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance0"), + ImmutableSet.of("HELIX:MIN_ACTIVE_REPLICA_CHECK_FAILED")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance1"), + ImmutableSet.of("HELIX:EMPTY_RESOURCE_ASSIGNMENT", "HELIX:INSTANCE_NOT_ENABLED", + "HELIX:INSTANCE_NOT_STABLE")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance2"), + ImmutableSet.of("HELIX:MIN_ACTIVE_REPLICA_CHECK_FAILED")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance3"), + ImmutableSet.of("HELIX:HAS_DISABLED_PARTITION", "HELIX:MIN_ACTIVE_REPLICA_CHECK_FAILED")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance4"), + ImmutableSet.of("HELIX:EMPTY_RESOURCE_ASSIGNMENT", "HELIX:INSTANCE_NOT_ALIVE", + "HELIX:INSTANCE_NOT_STABLE")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "invalidInstance"), ImmutableSet.of("HELIX:INSTANCE_NOT_EXIST")); + + // Select instances with cross zone based and perform a subset of checks + content = String.format( + "{\"%s\":\"%s\",\"%s\":[\"%s\",\"%s\",\"%s\",\"%s\",\"%s\",\"%s\", \"%s\"], \"%s\":[\"%s\",\"%s\"], \"%s\":[\"%s\", \"%s\"]}", + InstancesAccessor.InstancesProperties.selection_base.name(), + InstancesAccessor.InstanceHealthSelectionBase.cross_zone_based.name(), + InstancesAccessor.InstancesProperties.instances.name(), "instance0", "instance1", + "instance2", "instance3", "instance4", "instance5", "invalidInstance", + InstancesAccessor.InstancesProperties.zone_order.name(), "zone2", "zone1", + InstancesAccessor.InstancesProperties.stoppable_check_list.name(), "EMPTY_RESOURCE_ASSIGNMENT", "INSTANCE_NOT_ALIVE"); + response = new JerseyUriRequestBuilder( + "clusters/{}/instances?command=stoppable&skipHealthCheckCategories=CUSTOM_INSTANCE_CHECK,CUSTOM_PARTITION_CHECK").format( + STOPPABLE_CLUSTER).post(this, Entity.entity(content, MediaType.APPLICATION_JSON_TYPE)); + jsonNode = OBJECT_MAPPER.readTree(response.readEntity(String.class)); + nonStoppableInstances = jsonNode.get( + InstancesAccessor.InstancesProperties.instance_not_stoppable_with_reasons.name()); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance5"), + ImmutableSet.of("HELIX:EMPTY_RESOURCE_ASSIGNMENT", "HELIX:INSTANCE_NOT_ALIVE")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance4"), + ImmutableSet.of("HELIX:EMPTY_RESOURCE_ASSIGNMENT", "HELIX:INSTANCE_NOT_ALIVE")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "instance1"), + ImmutableSet.of("HELIX:EMPTY_RESOURCE_ASSIGNMENT")); + Assert.assertEquals(getStringSet(nonStoppableInstances, "invalidInstance"), + ImmutableSet.of("HELIX:INSTANCE_NOT_EXIST")); + + System.out.println("End test :" + TestHelper.getTestMethodName()); + } @Test(dependsOnMethods = "testInstanceStoppableZoneBasedWithToBeStoppedInstances") public void testInstanceStoppable_zoneBased_zoneOrder() throws IOException { diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/util/TestInstanceValidationUtilInRest.java b/helix-rest/src/test/java/org/apache/helix/rest/server/util/TestInstanceValidationUtilInRest.java index e37da34ff6..5754e77450 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/util/TestInstanceValidationUtilInRest.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/util/TestInstanceValidationUtilInRest.java @@ -30,18 +30,21 @@ import org.apache.helix.HelixDataAccessor; import org.apache.helix.PropertyKey; import org.apache.helix.model.ExternalView; +import org.apache.helix.model.InstanceConfig; import org.apache.helix.model.MasterSlaveSMD; import org.apache.helix.model.StateModelDefinition; import org.apache.helix.util.InstanceValidationUtil; import org.junit.Assert; import org.testng.annotations.Test; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; public class TestInstanceValidationUtilInRest{ private static final String RESOURCE_NAME = "TestResource"; private static final String TEST_CLUSTER = "TestCluster"; + private static final PropertyKey.Builder BUILDER = new PropertyKey.Builder(TEST_CLUSTER); @Test public void testPartitionLevelCheck() { @@ -49,12 +52,11 @@ public void testPartitionLevelCheck() { Mock mock = new Mock(); HelixDataAccessor accessor = mock.dataAccessor; - when(mock.dataAccessor.keyBuilder()) - .thenReturn(new PropertyKey.Builder(TEST_CLUSTER)); - when(mock.dataAccessor - .getProperty(new PropertyKey.Builder(TEST_CLUSTER).stateModelDef(MasterSlaveSMD.name))) + when(mock.dataAccessor.keyBuilder()).thenReturn(BUILDER); + when(mock.dataAccessor.getProperty(BUILDER.stateModelDef(MasterSlaveSMD.name))) .thenReturn(mock.stateModel); when(mock.stateModel.getTopState()).thenReturn("MASTER"); + setDefaultInstanceConfigs(mock); Map> failedPartitions = InstanceValidationUtil .perPartitionHealthCheck(externalViews, preparePartitionStateMap(), "h2", accessor); @@ -68,13 +70,12 @@ public void testPartitionLevelCheckInitState() { Mock mock = new Mock(); HelixDataAccessor accessor = mock.dataAccessor; - when(mock.dataAccessor.keyBuilder()) - .thenReturn(new PropertyKey.Builder(TEST_CLUSTER)); - when(mock.dataAccessor - .getProperty(new PropertyKey.Builder(TEST_CLUSTER).stateModelDef(MasterSlaveSMD.name))) + when(mock.dataAccessor.keyBuilder()).thenReturn(BUILDER); + when(mock.dataAccessor.getProperty(BUILDER.stateModelDef(MasterSlaveSMD.name))) .thenReturn(mock.stateModel); when(mock.stateModel.getTopState()).thenReturn("MASTER"); when(mock.stateModel.getInitialState()).thenReturn("OFFLINE"); + setDefaultInstanceConfigs(mock); Map> partitionStateMap = new HashMap<>(); partitionStateMap.put("h1", new HashMap<>()); @@ -111,13 +112,12 @@ public void testPartitionLevelCheckWithToBeStoppedNode() { Mock mock = new Mock(); HelixDataAccessor accessor = mock.dataAccessor; - when(mock.dataAccessor.keyBuilder()) - .thenReturn(new PropertyKey.Builder(TEST_CLUSTER)); - when(mock.dataAccessor - .getProperty(new PropertyKey.Builder(TEST_CLUSTER).stateModelDef(MasterSlaveSMD.name))) + when(mock.dataAccessor.keyBuilder()).thenReturn(BUILDER); + when(mock.dataAccessor.getProperty(BUILDER.stateModelDef(MasterSlaveSMD.name))) .thenReturn(mock.stateModel); when(mock.stateModel.getTopState()).thenReturn("MASTER"); when(mock.stateModel.getInitialState()).thenReturn("OFFLINE"); + setDefaultInstanceConfigs(mock); Map> partitionStateMap = new HashMap<>(); partitionStateMap.put("h1", new HashMap<>()); @@ -227,6 +227,21 @@ private ExternalView prepareExternalViewOnline() { return externalView; } + private void setDefaultInstanceConfigs(Mock mock) { + InstanceConfig instanceConfig1 = new InstanceConfig("h1"); + doReturn(instanceConfig1).when(mock.dataAccessor) + .getProperty(BUILDER.instanceConfig("h1")); + InstanceConfig instanceConfig2 = new InstanceConfig("h2"); + doReturn(instanceConfig2).when(mock.dataAccessor) + .getProperty(BUILDER.instanceConfig("h2")); + InstanceConfig instanceConfig3 = new InstanceConfig("h3"); + doReturn(instanceConfig3).when(mock.dataAccessor) + .getProperty(BUILDER.instanceConfig("h3")); + InstanceConfig instanceConfig4 = new InstanceConfig("h4"); + doReturn(instanceConfig4).when(mock.dataAccessor) + .getProperty(BUILDER.instanceConfig("h4")); + } + private final class Mock { private HelixDataAccessor dataAccessor = mock(HelixDataAccessor.class); private StateModelDefinition stateModel = mock(StateModelDefinition.class);