Skip to content

Commit

Permalink
[Backport 2.17] ClusterManagerTaskThrottler Improvements (#15508) (#1…
Browse files Browse the repository at this point in the history
…5671)

* ClusterManagerTaskThrottler Improvements (#15508)

* [ClusterManagerTaskThrottler Improvements] : Add shallow check in ClusterManagerTaskThrottler to fail fast before computeIfPresent to avoid lock when queue is full

Signed-off-by: Sumit Bansal <[email protected]>
  • Loading branch information
sumitasr authored Sep 5, 2024
1 parent e76dcf9 commit cd77e41
Show file tree
Hide file tree
Showing 4 changed files with 234 additions and 35 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218))
- Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454))
- Add support for comma-separated list of index names to be used with Snapshot Status API ([#15409](https://github.com/opensearch-project/OpenSearch/pull/15409))[SnapshotV2] Snapshot Status API changes (#15409))
- ClusterManagerTaskThrottler Improvements ([#15508](https://github.com/opensearch-project/OpenSearch/pull/15508))
- Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471))

### Dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
* <p>
* Set specific setting to for setting the threshold of throttling of particular task type.
* e.g : Set "cluster_manager.throttling.thresholds.put_mapping" to set throttling limit of "put mapping" tasks,
* Set it to default value(-1) to disable the throttling for this task type.
* Set it to default value(-1) to disable the throttling for this task type.
*/
public class ClusterManagerTaskThrottler implements TaskBatcherListener {
private static final Logger logger = LogManager.getLogger(ClusterManagerTaskThrottler.class);
Expand Down Expand Up @@ -69,7 +69,7 @@ public class ClusterManagerTaskThrottler implements TaskBatcherListener {
private final int MIN_THRESHOLD_VALUE = -1; // Disabled throttling
private final ClusterManagerTaskThrottlerListener clusterManagerTaskThrottlerListener;

private final ConcurrentMap<String, Long> tasksCount;
final ConcurrentMap<String, Long> tasksCount;
private final ConcurrentMap<String, Long> tasksThreshold;
private final Supplier<Version> minNodeVersionSupplier;

Expand Down Expand Up @@ -209,30 +209,59 @@ Long getThrottlingLimit(final String taskKey) {
return tasksThreshold.get(taskKey);
}

private void failFastWhenThrottlingThresholdsAreAlreadyBreached(
final boolean throttlingEnabledWithThreshold,
final Long threshold,
final long existingTaskCount,
final int incomingTaskCount,
final String taskThrottlingKey
) {
if (throttlingEnabledWithThreshold && shouldThrottle(threshold, existingTaskCount, incomingTaskCount)) {
throw new ClusterManagerThrottlingException("Throttling Exception : Limit exceeded for " + taskThrottlingKey);
}
}

@Override
public void onBeginSubmit(List<? extends TaskBatcher.BatchedTask> tasks) {
ThrottlingKey clusterManagerThrottlingKey = ((ClusterStateTaskExecutor<Object>) tasks.get(0).batchingKey)
final ThrottlingKey clusterManagerThrottlingKey = ((ClusterStateTaskExecutor<Object>) tasks.get(0).batchingKey)
.getClusterManagerThrottlingKey();
tasksCount.putIfAbsent(clusterManagerThrottlingKey.getTaskThrottlingKey(), 0L);
tasksCount.computeIfPresent(clusterManagerThrottlingKey.getTaskThrottlingKey(), (key, count) -> {
int size = tasks.size();
if (clusterManagerThrottlingKey.isThrottlingEnabled()) {
Long threshold = tasksThreshold.get(clusterManagerThrottlingKey.getTaskThrottlingKey());
if (threshold != null && shouldThrottle(threshold, count, size)) {
clusterManagerTaskThrottlerListener.onThrottle(clusterManagerThrottlingKey.getTaskThrottlingKey(), size);
logger.warn(
"Throwing Throttling Exception for [{}]. Trying to add [{}] tasks to queue, limit is set to [{}]",
clusterManagerThrottlingKey.getTaskThrottlingKey(),
tasks.size(),
threshold
);
throw new ClusterManagerThrottlingException(
"Throttling Exception : Limit exceeded for " + clusterManagerThrottlingKey.getTaskThrottlingKey()
);
}
}
return count + size;
});
final String taskThrottlingKey = clusterManagerThrottlingKey.getTaskThrottlingKey();
final Long threshold = getThrottlingLimit(taskThrottlingKey);
final boolean isThrottlingEnabledWithThreshold = clusterManagerThrottlingKey.isThrottlingEnabled() && threshold != null;
int incomingTaskCount = tasks.size();

try {
tasksCount.putIfAbsent(taskThrottlingKey, 0L);
// Perform shallow check before acquiring lock to avoid blocking of network threads
// if throttling is ongoing for a specific task
failFastWhenThrottlingThresholdsAreAlreadyBreached(
isThrottlingEnabledWithThreshold,
threshold,
tasksCount.get(taskThrottlingKey),
incomingTaskCount,
taskThrottlingKey
);

tasksCount.computeIfPresent(taskThrottlingKey, (key, existingTaskCount) -> {
failFastWhenThrottlingThresholdsAreAlreadyBreached(
isThrottlingEnabledWithThreshold,
threshold,
existingTaskCount,
incomingTaskCount,
taskThrottlingKey
);
return existingTaskCount + incomingTaskCount;
});
} catch (final ClusterManagerThrottlingException e) {
clusterManagerTaskThrottlerListener.onThrottle(taskThrottlingKey, incomingTaskCount);
logger.trace(
"Throwing Throttling Exception for [{}]. Trying to add [{}] tasks to queue, limit is set to [{}]",
taskThrottlingKey,
incomingTaskCount,
threshold
);
throw e;
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,10 @@ public ClusterManagerThrottlingException(String msg, Object... args) {
public ClusterManagerThrottlingException(StreamInput in) throws IOException {
super(in);
}

@Override
public Throwable fillInStackTrace() {
// This is on the hot path; stack traces are expensive to compute and not very useful for this exception, so don't fill it.
return this;
}
}
Loading

0 comments on commit cd77e41

Please sign in to comment.