Skip to content

Commit

Permalink
Merge pull request #6 from linkedin/master
Browse files Browse the repository at this point in the history
Release for helix 1.3.2-dev-202406121430
  • Loading branch information
zpinto authored Jun 12, 2024
2 parents 4c52f19 + 55bdc08 commit 46c4b37
Show file tree
Hide file tree
Showing 94 changed files with 2,429 additions and 1,233 deletions.
Original file line number Diff line number Diff line change
@@ -1,17 +1,103 @@
package org.apache.helix.constants;

import java.util.Set;

import com.google.common.collect.ImmutableSet;

public class InstanceConstants {
public static final String INSTANCE_NOT_DISABLED = "INSTANCE_NOT_DISABLED";

/**
* The set contains the InstanceOperations that are allowed to be assigned replicas by the rebalancer.
*/
public static final Set<InstanceOperation> ASSIGNABLE_INSTANCE_OPERATIONS =
ImmutableSet.of(InstanceOperation.ENABLE, InstanceOperation.DISABLE);


/**
* The set contains the InstanceOperations that are overridden when the deprecated HELIX_ENABLED
* field is set to false. This will maintain backwards compatibility with the deprecated field.
* TODO: Remove this when the deprecated HELIX_ENABLED is removed.
*/
public static final Set<InstanceOperation> INSTANCE_DISABLED_OVERRIDABLE_OPERATIONS =
ImmutableSet.of(InstanceOperation.ENABLE, InstanceOperation.EVACUATE);


/**
* The set of InstanceOperations that are not allowed to be populated in the RoutingTableProvider.
*/
public static final Set<InstanceOperation> UNROUTABLE_INSTANCE_OPERATIONS =
ImmutableSet.of(InstanceOperation.SWAP_IN, InstanceOperation.UNKNOWN);

@Deprecated
public enum InstanceDisabledType {
CLOUD_EVENT,
USER_OPERATION,
DEFAULT_INSTANCE_DISABLE_TYPE
}

public enum InstanceOperationSource {
ADMIN(0), USER(1), AUTOMATION(2), DEFAULT(3);

private final int _priority;

InstanceOperationSource(int priority) {
_priority = priority;
}

public int getPriority() {
return _priority;
}

/**
* Convert from InstanceDisabledType to InstanceOperationTrigger
*
* @param disabledType InstanceDisabledType
* @return InstanceOperationTrigger
*/
public static InstanceOperationSource instanceDisabledTypeToInstanceOperationSource(
InstanceDisabledType disabledType) {
switch (disabledType) {
case CLOUD_EVENT:
return InstanceOperationSource.AUTOMATION;
case USER_OPERATION:
return InstanceOperationSource.USER;
default:
return InstanceOperationSource.DEFAULT;
}
}
}

public enum InstanceOperation {
EVACUATE, // Node will be removed after a period of time
SWAP_IN, // New node joining for swap operation
SWAP_OUT // Existing Node to be removed for swap operation
/**
* Behavior: Replicas will be assigned to the node and will receive upward state transitions if
* for new assignments and downward state transitions if replicas are being moved elsewhere.
* Final State: The node will have replicas assigned to it and will be considered for future assignment.
*/
ENABLE,
/**
* Behavior: All replicas on the node will be set to OFFLINE.
* Final State: The node will have all replicas in the OFFLINE state and can't take new assignment.
*/
DISABLE,
/**
* Behavior: All replicas will be moved off the node, after a replacement has been bootstrapped
* in another node in the cluster.
* Final State: The node will not contain any replicas and will not be considered for *NEW* assignment.
*/
EVACUATE,
/**
* Behavior: Node will have all replicas on its corresponding(same logicalId) swap-out node bootstrapped
* (ERROR and OFFLINE replicas on swap-out node will not be bootstrapped) to the same states if the StateModelDef allows.
* This node will be excluded from the RoutingTableProvider.
* Final State: This node will be a mirror the swap-out node, will not be considered for assignment, and will not be populated
* in the RoutingTableProvider.
*/
SWAP_IN,
/**
* Behavior: Node will have all of its replicas dropped immediately and will be removed from the RoutingTableProvider.
* Final State: Node will not hold replicas, be considered for assignment, or be populated in the RoutingTableProvider.
*/
UNKNOWN
}
}
47 changes: 44 additions & 3 deletions helix-core/src/main/java/org/apache/helix/HelixAdmin.java
Original file line number Diff line number Diff line change
Expand Up @@ -281,9 +281,11 @@ void addResource(String clusterName, String resourceName, int numPartitions, Str
* @param instanceName
* @param enabled
*/
@Deprecated
void enableInstance(String clusterName, String instanceName, boolean enabled);

/**
* @deprecated use {@link #setInstanceOperation(String, String, InstanceConstants.InstanceOperation)}
* @param clusterName
* @param instanceName
* @param enabled
Expand All @@ -292,27 +294,54 @@ void addResource(String clusterName, String resourceName, int numPartitions, Str
* @param reason set additional string description on why the instance is disabled when
* <code>enabled</code> is false. Existing disabled reason will be over write if instance is in disabled state.
*/
@Deprecated
void enableInstance(String clusterName, String instanceName, boolean enabled,
InstanceConstants.InstanceDisabledType disabledType, String reason);

/**
* Batch enable/disable instances in a cluster
* By default, all the instances are enabled
* @deprecated use {@link #setInstanceOperation(String, String, InstanceConstants.InstanceOperation)}
* @param clusterName
* @param instances
* @param enabled
*/
@Deprecated
void enableInstance(String clusterName, List<String> instances, boolean enabled);

/**
* Set the instanceOperation field.
* Set the instanceOperation of and instance with {@link InstanceConstants.InstanceOperation}.
*
* @param clusterName The cluster name
* @param instanceName The instance name
* @param instanceOperation The instance operation
* @param instanceOperation The instance operation type
*/
void setInstanceOperation(String clusterName, String instanceName,
@Nullable InstanceConstants.InstanceOperation instanceOperation);
InstanceConstants.InstanceOperation instanceOperation);

/**
* Set the instanceOperation of and instance with {@link InstanceConstants.InstanceOperation}.
*
* @param clusterName The cluster name
* @param instanceName The instance name
* @param instanceOperation The instance operation type
* @param reason The reason for the operation
*/
void setInstanceOperation(String clusterName, String instanceName,
InstanceConstants.InstanceOperation instanceOperation, String reason);

/**
* Set the instanceOperation of and instance with {@link InstanceConstants.InstanceOperation}.
*
* @param clusterName The cluster name
* @param instanceName The instance name
* @param instanceOperation The instance operation type
* @param reason The reason for the operation
* @param overrideAll Whether to override all existing instance operations from all other
* instance operations
*/
void setInstanceOperation(String clusterName, String instanceName,
InstanceConstants.InstanceOperation instanceOperation, String reason, boolean overrideAll);

/**
* Disable or enable a resource
Expand Down Expand Up @@ -415,6 +444,18 @@ void manuallyEnableMaintenanceMode(String clusterName, boolean enabled, String r
*/
ClusterManagementMode getClusterManagementMode(String clusterName);

/**
* Set a list of partitions for an instance to ERROR state from any state.
* The partitions could be in any state and setPartitionsToError will bring them to ERROR
* state. ANY to ERROR state transition is required for this.
* @param clusterName
* @param instanceName
* @param resourceName
* @param partitionNames
*/
void setPartitionsToError(String clusterName, String instanceName, String resourceName,
List<String> partitionNames);

/**
* Reset a list of partitions in error state for an instance
* The partitions are assume to be in error state and reset will bring them from error
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import org.apache.helix.HelixManager;
import org.apache.helix.constants.InstanceConstants;
import org.apache.helix.model.ClusterConfig;
import org.apache.helix.model.InstanceConfig;
import org.apache.helix.util.InstanceUtil;
import org.apache.helix.util.InstanceValidationUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -49,9 +51,14 @@ public void disableInstance(HelixManager manager, Object eventInfo) {
LOG.info("DefaultCloudEventCallbackImpl disable Instance {}", manager.getInstanceName());
if (InstanceValidationUtil
.isEnabled(manager.getHelixDataAccessor(), manager.getInstanceName())) {
manager.getClusterManagmentTool()
.enableInstance(manager.getClusterName(), manager.getInstanceName(), false,
InstanceConstants.InstanceDisabledType.CLOUD_EVENT, message);
InstanceUtil.setInstanceOperation(manager.getConfigAccessor(),
manager.getHelixDataAccessor().getBaseDataAccessor(), manager.getClusterName(),
manager.getInstanceName(),
new InstanceConfig.InstanceOperation.Builder().setOperation(
InstanceConstants.InstanceOperation.DISABLE)
.setSource(InstanceConstants.InstanceOperationSource.AUTOMATION)
.setReason(message)
.build());
}
HelixEventHandlingUtil.updateCloudEventOperationInClusterConfig(manager.getClusterName(),
manager.getInstanceName(), manager.getHelixDataAccessor().getBaseDataAccessor(), false,
Expand All @@ -72,10 +79,13 @@ public void enableInstance(HelixManager manager, Object eventInfo) {
HelixEventHandlingUtil
.updateCloudEventOperationInClusterConfig(manager.getClusterName(), instanceName,
manager.getHelixDataAccessor().getBaseDataAccessor(), true, message);
if (HelixEventHandlingUtil.isInstanceDisabledForCloudEvent(instanceName, accessor)) {
manager.getClusterManagmentTool().enableInstance(manager.getClusterName(), instanceName, true,
InstanceConstants.InstanceDisabledType.CLOUD_EVENT, message);
}
InstanceUtil.setInstanceOperation(manager.getConfigAccessor(),
manager.getHelixDataAccessor().getBaseDataAccessor(), manager.getClusterName(),
manager.getInstanceName(),
new InstanceConfig.InstanceOperation.Builder().setOperation(
InstanceConstants.InstanceOperation.ENABLE)
.setSource(InstanceConstants.InstanceOperationSource.AUTOMATION).setReason(message)
.build());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ class HelixEventHandlingUtil {
* @param dataAccessor
* @return return true only when instance is Helix disabled and the disabled reason in
* instanceConfig is cloudEvent
* @deprecated No need to check this if using InstanceOperation and specifying the trigger as CLOUD
* when enabling.
*/
@Deprecated
static boolean isInstanceDisabledForCloudEvent(String instanceName,
HelixDataAccessor dataAccessor) {
InstanceConfig instanceConfig =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
* under the License.
*/

import java.util.HashMap;
import java.util.Map;
import java.util.Set;

Expand Down Expand Up @@ -62,6 +63,21 @@ protected Map<FieldType, Set<String>> getNonTrimmableFields(InstanceConfig insta
return STATIC_TOPOLOGY_RELATED_FIELD_MAP;
}

/**
* We should trim HELIX_INSTANCE_OPERATIONS field, it is used to filter instances in the
* BaseControllerDataProvider. That filtering will be used to determine if ResourceChangeSnapshot
* has changed as opposed to checking the actual value of the field.
*
* @param property the instance config
* @return a map contains all non-trimmable field keys that need to be kept.
*/
protected Map<FieldType, Set<String>> getNonTrimmableKeys(InstanceConfig property) {
Map<FieldType, Set<String>> nonTrimmableKeys = super.getNonTrimmableKeys(property);
nonTrimmableKeys.get(FieldType.LIST_FIELD)
.remove(InstanceConfigProperty.HELIX_INSTANCE_OPERATIONS.name());
return nonTrimmableKeys;
}

@Override
public InstanceConfig trimProperty(InstanceConfig property) {
return new InstanceConfig(doTrim(property));
Expand Down
Loading

0 comments on commit 46c4b37

Please sign in to comment.