From 59302a3d5ea255be7f2bb72187b8df1f0aa33572 Mon Sep 17 00:00:00 2001 From: Mohit Godwani <81609427+mgodwan@users.noreply.github.com> Date: Fri, 26 Jul 2024 18:37:24 +0530 Subject: [PATCH 1/6] Fix version check after backport (#14985) Signed-off-by: Mohit Godwani --- .../opensearch/cluster/metadata/ComposableIndexTemplate.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/cluster/metadata/ComposableIndexTemplate.java b/server/src/main/java/org/opensearch/cluster/metadata/ComposableIndexTemplate.java index 594dda83c41e2..63bbe4144c4fb 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/ComposableIndexTemplate.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/ComposableIndexTemplate.java @@ -184,7 +184,7 @@ public ComposableIndexTemplate(StreamInput in) throws IOException { this.version = in.readOptionalVLong(); this.metadata = in.readMap(); this.dataStreamTemplate = in.readOptionalWriteable(DataStreamTemplate::new); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_16_0)) { this.context = in.readOptionalWriteable(Context::new); } else { this.context = null; @@ -248,7 +248,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalVLong(this.version); out.writeMap(this.metadata); out.writeOptionalWriteable(dataStreamTemplate); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_16_0)) { out.writeOptionalWriteable(context); } } From d08c4253e18981d688253d20fb967e614923a957 Mon Sep 17 00:00:00 2001 From: Rahul Karajgikar <50844303+rahulkarajgikar@users.noreply.github.com> Date: Mon, 29 Jul 2024 18:18:44 +0530 Subject: [PATCH 2/6] [Batch Fetch] Fix for hasInitiatedFetching to fix allocation explain and manual reroute APIs (#14972) * Fix for hasInitiatedFetching() in batch mode Signed-off-by: Rahul Karajgikar --- CHANGELOG.md | 1 + .../gateway/RecoveryFromGatewayIT.java | 160 +++++++++++++++++- .../gateway/AsyncShardBatchFetch.java | 8 + .../gateway/ReplicaShardBatchAllocator.java | 2 +- .../gateway/ShardsBatchGatewayAllocator.java | 31 +++- 5 files changed, 191 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e88a084f7d7f6..d4c8c955bced4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added +- Fix for hasInitiatedFetching to fix allocation explain and manual reroute APIs (([#14972](https://github.com/opensearch-project/OpenSearch/pull/14972)) ### Dependencies - Bump `org.apache.commons:commons-lang3` from 3.14.0 to 3.15.0 ([#14861](https://github.com/opensearch-project/OpenSearch/pull/14861)) diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java index 4085cc3890f30..eccc903dfac82 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/RecoveryFromGatewayIT.java @@ -57,6 +57,7 @@ import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.ShardRoutingState; import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; import org.opensearch.cluster.routing.allocation.AllocationDecision; import org.opensearch.cluster.routing.allocation.ExistingShardsAllocator; import org.opensearch.cluster.service.ClusterService; @@ -797,11 +798,26 @@ public void testBatchModeEnabledWithoutTimeout() throws Exception { ); assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); assertEquals(1, gatewayAllocator.getNumberOfStartedShardBatches()); - assertEquals(1, gatewayAllocator.getNumberOfStoreShardBatches()); + // Replica shard would be marked ineligible since there are no data nodes. + // It would then be removed from any batch and batches would get deleted, so we would have 0 replica batches + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); - // Now start both data nodes and ensure batch mode is working - logger.info("--> restarting the stopped nodes"); + // Now start one data node + logger.info("--> restarting the first stopped node"); internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(0)).put(node0DataPathSettings).build()); + ensureStableCluster(2); + ensureYellow("test"); + assertEquals(0, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); + assertEquals(0, gatewayAllocator.getNumberOfInFlightFetches()); + + // calling reroute and asserting on reroute response + logger.info("--> calling reroute while cluster is yellow"); + clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + assertTrue(clusterRerouteResponse.isAcknowledged()); + + // Now start last data node and ensure batch mode is working and cluster goes green + logger.info("--> restarting the second stopped node"); internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(1)).put(node1DataPathSettings).build()); ensureStableCluster(3); ensureGreen("test"); @@ -842,11 +858,26 @@ public void testBatchModeEnabledWithSufficientTimeoutAndClusterGreen() throws Ex ); assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); assertEquals(1, gatewayAllocator.getNumberOfStartedShardBatches()); - assertEquals(1, gatewayAllocator.getNumberOfStoreShardBatches()); + // Replica shard would be marked ineligible since there are no data nodes. + // It would then be removed from any batch and batches would get deleted, so we would have 0 replica batches + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); - // Now start both data nodes and ensure batch mode is working - logger.info("--> restarting the stopped nodes"); + // Now start one data nodes and ensure batch mode is working + logger.info("--> restarting the first stopped node"); internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(0)).put(node0DataPathSettings).build()); + ensureStableCluster(2); + ensureYellow("test"); + assertEquals(0, gatewayAllocator.getNumberOfStartedShardBatches()); + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); + assertEquals(0, gatewayAllocator.getNumberOfInFlightFetches()); + + // calling reroute and asserting on reroute response + logger.info("--> calling reroute while cluster is yellow"); + clusterRerouteResponse = client().admin().cluster().prepareReroute().setRetryFailed(true).get(); + assertTrue(clusterRerouteResponse.isAcknowledged()); + + // Now start last data node and ensure batch mode is working and cluster goes green + logger.info("--> restarting the second stopped node"); internalCluster().startDataOnlyNode(Settings.builder().put("node.name", dataOnlyNodes.get(1)).put(node1DataPathSettings).build()); ensureStableCluster(3); ensureGreen("test"); @@ -907,7 +938,9 @@ public void testBatchModeEnabledWithInSufficientTimeoutButClusterGreen() throws assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); assertEquals(10, gatewayAllocator.getNumberOfStartedShardBatches()); - assertEquals(10, gatewayAllocator.getNumberOfStoreShardBatches()); + // All replica shards would be marked ineligible since there are no data nodes. + // They would then be removed from any batch and batches would get deleted, so we would have 0 replica batches + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); health = client(internalCluster().getClusterManagerName()).admin().cluster().health(Requests.clusterHealthRequest()).actionGet(); assertFalse(health.isTimedOut()); assertEquals(RED, health.getStatus()); @@ -1051,6 +1084,18 @@ public void testMultipleReplicaShardAssignmentWithDelayedAllocationAndDifferentN ensureGreen("test"); } + public void testAllocationExplainReturnsNoWhenExtraReplicaShardInNonBatchMode() throws Exception { + // Non batch mode - This test is to validate that we don't return AWAITING_INFO in allocation explain API when the deciders are + // returning NO + this.allocationExplainReturnsNoWhenExtraReplicaShard(false); + } + + public void testAllocationExplainReturnsNoWhenExtraReplicaShardInBatchMode() throws Exception { + // Batch mode - This test is to validate that we don't return AWAITING_INFO in allocation explain API when the deciders are + // returning NO + this.allocationExplainReturnsNoWhenExtraReplicaShard(true); + } + public void testNBatchesCreationAndAssignment() throws Exception { // we will reduce batch size to 5 to make sure we have enough batches to test assignment // Total number of primary shards = 50 (50 indices*1) @@ -1104,7 +1149,9 @@ public void testNBatchesCreationAndAssignment() throws Exception { ); assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); assertEquals(10, gatewayAllocator.getNumberOfStartedShardBatches()); - assertEquals(10, gatewayAllocator.getNumberOfStoreShardBatches()); + // All replica shards would be marked ineligible since there are no data nodes. + // They would then be removed from any batch and batches would get deleted, so we would have 0 replica batches + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); health = client(internalCluster().getClusterManagerName()).admin().cluster().health(Requests.clusterHealthRequest()).actionGet(); assertFalse(health.isTimedOut()); assertEquals(RED, health.getStatus()); @@ -1193,7 +1240,9 @@ public void testCulpritShardInBatch() throws Exception { ); assertTrue(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.get(internalCluster().clusterService().getSettings())); assertEquals(1, gatewayAllocator.getNumberOfStartedShardBatches()); - assertEquals(1, gatewayAllocator.getNumberOfStoreShardBatches()); + // Replica shard would be marked ineligible since there are no data nodes. + // It would then be removed from any batch and batches would get deleted, so we would have 0 replica batches + assertEquals(0, gatewayAllocator.getNumberOfStoreShardBatches()); assertTrue(clusterRerouteResponse.isAcknowledged()); health = client(internalCluster().getClusterManagerName()).admin().cluster().health(Requests.clusterHealthRequest()).actionGet(); assertFalse(health.isTimedOut()); @@ -1511,4 +1560,97 @@ private List findNodesWithShard(final boolean primary) { Collections.shuffle(requiredStartedShards, random()); return requiredStartedShards.stream().map(shard -> state.nodes().get(shard.currentNodeId()).getName()).collect(Collectors.toList()); } + + private void allocationExplainReturnsNoWhenExtraReplicaShard(boolean batchModeEnabled) throws Exception { + internalCluster().startClusterManagerOnlyNodes( + 1, + Settings.builder().put(ExistingShardsAllocator.EXISTING_SHARDS_ALLOCATOR_BATCH_MODE.getKey(), batchModeEnabled).build() + ); + internalCluster().startDataOnlyNodes(5); + createIndex( + "test", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 4).build() + ); + ensureGreen("test"); + ensureStableCluster(6); + + // Stop one of the nodes to make the cluster yellow + // We cannot directly create an index with replica = data node count because then the whole flow will get skipped due to + // INDEX_CREATED + List nodesWithReplicaShards = findNodesWithShard(false); + Settings replicaNodeDataPathSettings = internalCluster().dataPathSettings(nodesWithReplicaShards.get(0)); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(nodesWithReplicaShards.get(0))); + + ensureStableCluster(5); + ensureYellow("test"); + + logger.info("--> calling allocation explain API"); + // shard should have decision NO because there is no valid node for the extra replica to go to + AllocateUnassignedDecision aud = client().admin() + .cluster() + .prepareAllocationExplain() + .setIndex("test") + .setShard(0) + .setPrimary(false) + .get() + .getExplanation() + .getShardAllocationDecision() + .getAllocateDecision(); + + assertEquals(AllocationDecision.NO, aud.getAllocationDecision()); + assertEquals("cannot allocate because allocation is not permitted to any of the nodes", aud.getExplanation()); + + // Now creating a new index with too many replicas and trying again + createIndex( + "test2", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 5).build() + ); + + ensureYellowAndNoInitializingShards("test2"); + + logger.info("--> calling allocation explain API again"); + // shard should have decision NO because there are 6 replicas and 4 data nodes + aud = client().admin() + .cluster() + .prepareAllocationExplain() + .setIndex("test2") + .setShard(0) + .setPrimary(false) + .get() + .getExplanation() + .getShardAllocationDecision() + .getAllocateDecision(); + + assertEquals(AllocationDecision.NO, aud.getAllocationDecision()); + assertEquals("cannot allocate because allocation is not permitted to any of the nodes", aud.getExplanation()); + + logger.info("--> restarting the stopped node"); + internalCluster().startDataOnlyNode( + Settings.builder().put("node.name", nodesWithReplicaShards.get(0)).put(replicaNodeDataPathSettings).build() + ); + + ensureStableCluster(6); + ensureGreen("test"); + + logger.info("--> calling allocation explain API 3rd time"); + // shard should still have decision NO because there are 6 replicas and 5 data nodes + aud = client().admin() + .cluster() + .prepareAllocationExplain() + .setIndex("test2") + .setShard(0) + .setPrimary(false) + .get() + .getExplanation() + .getShardAllocationDecision() + .getAllocateDecision(); + + assertEquals(AllocationDecision.NO, aud.getAllocationDecision()); + assertEquals("cannot allocate because allocation is not permitted to any of the nodes", aud.getExplanation()); + + internalCluster().startDataOnlyNodes(1); + + ensureStableCluster(7); + ensureGreen("test2"); + } } diff --git a/server/src/main/java/org/opensearch/gateway/AsyncShardBatchFetch.java b/server/src/main/java/org/opensearch/gateway/AsyncShardBatchFetch.java index 4f39a39cea678..df642a9f5a743 100644 --- a/server/src/main/java/org/opensearch/gateway/AsyncShardBatchFetch.java +++ b/server/src/main/java/org/opensearch/gateway/AsyncShardBatchFetch.java @@ -80,6 +80,14 @@ public synchronized void clearShard(ShardId shardId) { this.cache.deleteShard(shardId); } + public boolean hasEmptyCache() { + return this.cache.getCache().isEmpty(); + } + + public AsyncShardFetchCache getCache() { + return this.cache; + } + /** * Cache implementation of transport actions returning batch of shards related data in the response. * Store node level responses of transport actions like {@link TransportNodesListGatewayStartedShardsBatch} or diff --git a/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java b/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java index 7c75f2a5d1a8f..0818b187271cb 100644 --- a/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/ReplicaShardBatchAllocator.java @@ -183,7 +183,7 @@ private AllocateUnassignedDecision getUnassignedShardAllocationDecision( if (allocationDecision.type() != Decision.Type.YES && (!explain || !hasInitiatedFetching(shardRouting))) { // only return early if we are not in explain mode, or we are in explain mode but we have not // yet attempted to fetch any shard data - logger.trace("{}: ignoring allocation, can't be allocated on any node", shardRouting); + logger.trace("{}: ignoring allocation, can't be allocated on any node. Decision: {}", shardRouting, allocationDecision.type()); return AllocateUnassignedDecision.no( UnassignedInfo.AllocationStatus.fromDecision(allocationDecision.type()), result.v2() != null ? new ArrayList<>(result.v2().values()) : null diff --git a/server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java b/server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java index 55f5388d8f454..673ed8dbaa1c3 100644 --- a/server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java +++ b/server/src/main/java/org/opensearch/gateway/ShardsBatchGatewayAllocator.java @@ -576,8 +576,37 @@ protected AsyncShardFetch.FetchResult Date: Mon, 29 Jul 2024 10:26:49 -0400 Subject: [PATCH 3/6] Bump com.microsoft.azure:msal4j from 1.16.1 to 1.16.2 in /plugins/repository-azure (#14995) * Bump com.microsoft.azure:msal4j in /plugins/repository-azure Bumps [com.microsoft.azure:msal4j](https://github.com/AzureAD/microsoft-authentication-library-for-java) from 1.16.1 to 1.16.2. - [Release notes](https://github.com/AzureAD/microsoft-authentication-library-for-java/releases) - [Changelog](https://github.com/AzureAD/microsoft-authentication-library-for-java/blob/dev/changelog.txt) - [Commits](https://github.com/AzureAD/microsoft-authentication-library-for-java/compare/v1.16.1...v1.16.2) --- updated-dependencies: - dependency-name: com.microsoft.azure:msal4j dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * Updating SHAs Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] --- CHANGELOG.md | 1 + plugins/repository-azure/build.gradle | 2 +- plugins/repository-azure/licenses/msal4j-1.16.1.jar.sha1 | 1 - plugins/repository-azure/licenses/msal4j-1.16.2.jar.sha1 | 1 + 4 files changed, 3 insertions(+), 2 deletions(-) delete mode 100644 plugins/repository-azure/licenses/msal4j-1.16.1.jar.sha1 create mode 100644 plugins/repository-azure/licenses/msal4j-1.16.2.jar.sha1 diff --git a/CHANGELOG.md b/CHANGELOG.md index d4c8c955bced4..138efce1c29e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Dependencies - Bump `org.apache.commons:commons-lang3` from 3.14.0 to 3.15.0 ([#14861](https://github.com/opensearch-project/OpenSearch/pull/14861)) +- Bump `com.microsoft.azure:msal4j` from 1.16.1 to 1.16.2 ([#14995](https://github.com/opensearch-project/OpenSearch/pull/14995)) ### Changed diff --git a/plugins/repository-azure/build.gradle b/plugins/repository-azure/build.gradle index 7bd7be1481a2f..15e3158f2dbc4 100644 --- a/plugins/repository-azure/build.gradle +++ b/plugins/repository-azure/build.gradle @@ -61,7 +61,7 @@ dependencies { // Start of transitive dependencies for azure-identity api 'com.microsoft.azure:msal4j-persistence-extension:1.3.0' api "net.java.dev.jna:jna-platform:${versions.jna}" - api 'com.microsoft.azure:msal4j:1.16.1' + api 'com.microsoft.azure:msal4j:1.16.2' api 'com.nimbusds:oauth2-oidc-sdk:11.9.1' api 'com.nimbusds:nimbus-jose-jwt:9.40' api 'com.nimbusds:content-type:2.3' diff --git a/plugins/repository-azure/licenses/msal4j-1.16.1.jar.sha1 b/plugins/repository-azure/licenses/msal4j-1.16.1.jar.sha1 deleted file mode 100644 index 7d24922196be4..0000000000000 --- a/plugins/repository-azure/licenses/msal4j-1.16.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4ad89b4632ef9abab883114e77c079843a206862 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/msal4j-1.16.2.jar.sha1 b/plugins/repository-azure/licenses/msal4j-1.16.2.jar.sha1 new file mode 100644 index 0000000000000..1363e5a0793d2 --- /dev/null +++ b/plugins/repository-azure/licenses/msal4j-1.16.2.jar.sha1 @@ -0,0 +1 @@ +b43ec4dd657f8ed5922bc0a8ccbe49000968bd15 \ No newline at end of file From 122f3f0ab7448f06a20b46919c6e23e74ce1fa9c Mon Sep 17 00:00:00 2001 From: Rishab Nahata Date: Mon, 29 Jul 2024 20:20:26 +0530 Subject: [PATCH 4/6] Cache index shard limit to optimise ShardLimitsAllocationDecider (#14962) * Cache index shard limit per node Signed-off-by: Rishab Nahata --- .../routing/allocation/RerouteBenchmark.java | 135 ++++++++++++++++++ .../cluster/metadata/IndexMetadata.java | 16 ++- .../decider/ShardsLimitAllocationDecider.java | 4 +- 3 files changed, 150 insertions(+), 5 deletions(-) create mode 100644 benchmarks/src/main/java/org/opensearch/benchmark/routing/allocation/RerouteBenchmark.java diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/routing/allocation/RerouteBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/routing/allocation/RerouteBenchmark.java new file mode 100644 index 0000000000000..e54bca579423b --- /dev/null +++ b/benchmarks/src/main/java/org/opensearch/benchmark/routing/allocation/RerouteBenchmark.java @@ -0,0 +1,135 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.benchmark.routing.allocation; + +import org.opensearch.Version; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.common.logging.LogConfigurator; +import org.opensearch.common.settings.Settings; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import static org.opensearch.cluster.routing.ShardRoutingState.INITIALIZING; + +@Fork(1) +@Warmup(iterations = 3) +@Measurement(iterations = 3) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +@SuppressWarnings("unused") // invoked by benchmarking framework +public class RerouteBenchmark { + @Param({ + // indices| nodes + " 10000| 500|", }) + public String indicesNodes = "1|1"; + public int numIndices; + public int numNodes; + public int numShards = 10; + public int numReplicas = 1; + + private AllocationService allocationService; + private ClusterState initialClusterState; + + @Setup + public void setUp() throws Exception { + LogConfigurator.setNodeName("test"); + final String[] params = indicesNodes.split("\\|"); + numIndices = toInt(params[0]); + numNodes = toInt(params[1]); + + int totalShardCount = (numReplicas + 1) * numShards * numIndices; + Metadata.Builder mb = Metadata.builder(); + for (int i = 1; i <= numIndices; i++) { + mb.put( + IndexMetadata.builder("test_" + i) + .settings(Settings.builder().put("index.version.created", Version.CURRENT)) + .numberOfShards(numShards) + .numberOfReplicas(numReplicas) + ); + } + + Metadata metadata = mb.build(); + RoutingTable.Builder rb = RoutingTable.builder(); + for (int i = 1; i <= numIndices; i++) { + rb.addAsNew(metadata.index("test_" + i)); + } + RoutingTable routingTable = rb.build(); + initialClusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(routingTable) + .nodes(setUpClusterNodes(numNodes)) + .build(); + } + + @Benchmark + public ClusterState measureShardAllocationEmptyCluster() throws Exception { + ClusterState clusterState = initialClusterState; + allocationService = Allocators.createAllocationService( + Settings.builder() + .put("cluster.routing.allocation.awareness.attributes", "zone") + .put("cluster.routing.allocation.load_awareness.provisioned_capacity", numNodes) + .put("cluster.routing.allocation.load_awareness.skew_factor", "50") + .put("cluster.routing.allocation.node_concurrent_recoveries", "2") + .build() + ); + clusterState = allocationService.reroute(clusterState, "reroute"); + while (clusterState.getRoutingNodes().hasUnassignedShards()) { + clusterState = startInitializingShardsAndReroute(allocationService, clusterState); + } + return clusterState; + } + + private int toInt(String v) { + return Integer.valueOf(v.trim()); + } + + private DiscoveryNodes.Builder setUpClusterNodes(int nodes) { + DiscoveryNodes.Builder nb = DiscoveryNodes.builder(); + for (int i = 1; i <= nodes; i++) { + Map attributes = new HashMap<>(); + attributes.put("zone", "zone_" + (i % 3)); + nb.add(Allocators.newNode("node_0_" + i, attributes)); + } + return nb; + } + + private static ClusterState startInitializingShardsAndReroute(AllocationService allocationService, ClusterState clusterState) { + return startShardsAndReroute(allocationService, clusterState, clusterState.routingTable().shardsWithState(INITIALIZING)); + } + + private static ClusterState startShardsAndReroute( + AllocationService allocationService, + ClusterState clusterState, + List initializingShards + ) { + return allocationService.reroute(allocationService.applyStartedShards(clusterState, initializingShards), "reroute after starting"); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java index 9e7fe23f29872..df0d2609ad83d 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/IndexMetadata.java @@ -43,6 +43,7 @@ import org.opensearch.cluster.block.ClusterBlockLevel; import org.opensearch.cluster.node.DiscoveryNodeFilters; import org.opensearch.cluster.routing.allocation.IndexMetadataUpdater; +import org.opensearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.opensearch.common.Nullable; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.collect.MapBuilder; @@ -686,6 +687,8 @@ public static APIBlock readFrom(StreamInput input) throws IOException { private final boolean isSystem; private final boolean isRemoteSnapshot; + private final int indexTotalShardsPerNodeLimit; + private IndexMetadata( final Index index, final long version, @@ -711,7 +714,8 @@ private IndexMetadata( final int routingPartitionSize, final ActiveShardCount waitForActiveShards, final Map rolloverInfos, - final boolean isSystem + final boolean isSystem, + final int indexTotalShardsPerNodeLimit ) { this.index = index; @@ -746,6 +750,7 @@ private IndexMetadata( this.rolloverInfos = Collections.unmodifiableMap(rolloverInfos); this.isSystem = isSystem; this.isRemoteSnapshot = IndexModule.Type.REMOTE_SNAPSHOT.match(this.settings); + this.indexTotalShardsPerNodeLimit = indexTotalShardsPerNodeLimit; assert numberOfShards * routingFactor == routingNumShards : routingNumShards + " must be a multiple of " + numberOfShards; } @@ -899,6 +904,10 @@ public Set inSyncAllocationIds(int shardId) { return inSyncAllocationIds.get(shardId); } + public int getIndexTotalShardsPerNodeLimit() { + return this.indexTotalShardsPerNodeLimit; + } + @Nullable public DiscoveryNodeFilters requireFilters() { return requireFilters; @@ -1583,6 +1592,8 @@ public IndexMetadata build() { ); } + final int indexTotalShardsPerNodeLimit = ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING.get(settings); + final String uuid = settings.get(SETTING_INDEX_UUID, INDEX_UUID_NA_VALUE); return new IndexMetadata( @@ -1610,7 +1621,8 @@ public IndexMetadata build() { routingPartitionSize, waitForActiveShards, rolloverInfos, - isSystem + isSystem, + indexTotalShardsPerNodeLimit ); } diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/ShardsLimitAllocationDecider.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/ShardsLimitAllocationDecider.java index c008102554e8c..6f211f370de95 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/ShardsLimitAllocationDecider.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/ShardsLimitAllocationDecider.java @@ -32,7 +32,6 @@ package org.opensearch.cluster.routing.allocation.decider; -import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.routing.RoutingNode; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.ShardRoutingState; @@ -125,8 +124,7 @@ private Decision doDecide( RoutingAllocation allocation, BiPredicate decider ) { - IndexMetadata indexMd = allocation.metadata().getIndexSafe(shardRouting.index()); - final int indexShardLimit = INDEX_TOTAL_SHARDS_PER_NODE_SETTING.get(indexMd.getSettings(), settings); + final int indexShardLimit = allocation.metadata().getIndexSafe(shardRouting.index()).getIndexTotalShardsPerNodeLimit(); // Capture the limit here in case it changes during this method's // execution final int clusterShardLimit = this.clusterShardLimit; From 691f78ca480588df3d27dcad96601b22e77b6386 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Mon, 29 Jul 2024 11:50:29 -0400 Subject: [PATCH 5/6] OpenJDK Update (July 2024 Patch releases) (#14998) Signed-off-by: Andriy Redko --- CHANGELOG.md | 1 + .../java/org/opensearch/gradle/test/DistroTestPlugin.java | 4 ++-- buildSrc/version.properties | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 138efce1c29e7..1b21fded97e47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Dependencies - Bump `org.apache.commons:commons-lang3` from 3.14.0 to 3.15.0 ([#14861](https://github.com/opensearch-project/OpenSearch/pull/14861)) +- OpenJDK Update (July 2024 Patch releases) ([#14998](https://github.com/opensearch-project/OpenSearch/pull/14998)) - Bump `com.microsoft.azure:msal4j` from 1.16.1 to 1.16.2 ([#14995](https://github.com/opensearch-project/OpenSearch/pull/14995)) ### Changed diff --git a/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java b/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java index b2b3e3003e572..8d5ce9143cbac 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/test/DistroTestPlugin.java @@ -77,9 +77,9 @@ import java.util.stream.Stream; public class DistroTestPlugin implements Plugin { - private static final String SYSTEM_JDK_VERSION = "21.0.3+9"; + private static final String SYSTEM_JDK_VERSION = "21.0.4+7"; private static final String SYSTEM_JDK_VENDOR = "adoptium"; - private static final String GRADLE_JDK_VERSION = "21.0.3+9"; + private static final String GRADLE_JDK_VERSION = "21.0.4+7"; private static final String GRADLE_JDK_VENDOR = "adoptium"; // all distributions used by distro tests. this is temporary until tests are per distribution diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 855ccc1f87413..7d32ed3df7b76 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -2,7 +2,7 @@ opensearch = 3.0.0 lucene = 9.12.0-snapshot-847316d bundled_jdk_vendor = adoptium -bundled_jdk = 21.0.3+9 +bundled_jdk = 21.0.4+7 # optional dependencies spatial4j = 0.7 From f5b0ebaab8632932faa362caffa412b7eb6eb23a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Jul 2024 12:03:23 -0500 Subject: [PATCH 6/6] Bump actions/github-script from 6 to 7 (#14997) * Bump actions/github-script from 6 to 7 Bumps [actions/github-script](https://github.com/actions/github-script) from 6 to 7. - [Release notes](https://github.com/actions/github-script/releases) - [Commits](https://github.com/actions/github-script/compare/v6...v7) --- updated-dependencies: - dependency-name: actions/github-script dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * Update changelog Signed-off-by: dependabot[bot] --------- Signed-off-by: dependabot[bot] Signed-off-by: Daniel (dB.) Doubrovkine Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: dependabot[bot] Co-authored-by: Daniel (dB.) Doubrovkine --- .github/workflows/add-performance-comment.yml | 2 +- .github/workflows/benchmark-pull-request.yml | 6 +++--- .github/workflows/maintainer-approval.yml | 2 +- .github/workflows/triage.yml | 2 +- .github/workflows/version.yml | 2 +- CHANGELOG.md | 1 + 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/add-performance-comment.yml b/.github/workflows/add-performance-comment.yml index fc272714c5628..6a310bff4c0a1 100644 --- a/.github/workflows/add-performance-comment.yml +++ b/.github/workflows/add-performance-comment.yml @@ -16,7 +16,7 @@ jobs: steps: - name: Add comment to PR - uses: actions/github-script@v6 + uses: actions/github-script@v7 with: github-token: ${{secrets.GITHUB_TOKEN}} script: | diff --git a/.github/workflows/benchmark-pull-request.yml b/.github/workflows/benchmark-pull-request.yml index 47abcc1178572..98dd39b1dad54 100644 --- a/.github/workflows/benchmark-pull-request.yml +++ b/.github/workflows/benchmark-pull-request.yml @@ -25,7 +25,7 @@ jobs: echo "USER_TAGS=pull_request_number:${{ github.event.issue.number }},repository:OpenSearch" >> $GITHUB_ENV - name: Check comment format id: check_comment - uses: actions/github-script@v6 + uses: actions/github-script@v7 with: script: | const fs = require('fs'); @@ -62,7 +62,7 @@ jobs: } - name: Post invalid format comment if: steps.check_comment.outputs.invalid == 'true' - uses: actions/github-script@v6 + uses: actions/github-script@v7 with: github-token: ${{secrets.GITHUB_TOKEN}} script: | @@ -150,7 +150,7 @@ jobs: cat $GITHUB_ENV bash opensearch-build/scripts/benchmark/benchmark-pull-request.sh ${{ secrets.JENKINS_PR_BENCHMARK_GENERIC_WEBHOOK_TOKEN }} - name: Update PR with Job Url - uses: actions/github-script@v6 + uses: actions/github-script@v7 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | diff --git a/.github/workflows/maintainer-approval.yml b/.github/workflows/maintainer-approval.yml index fdc2bf16937b4..34e8f57cc1878 100644 --- a/.github/workflows/maintainer-approval.yml +++ b/.github/workflows/maintainer-approval.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - id: find-maintainers - uses: actions/github-script@v7.0.1 + uses: actions/github-script@v7 with: github-token: ${{ secrets.GITHUB_TOKEN }} result-encoding: string diff --git a/.github/workflows/triage.yml b/.github/workflows/triage.yml index 83bf4926a8c2d..c305818bdb0a9 100644 --- a/.github/workflows/triage.yml +++ b/.github/workflows/triage.yml @@ -9,7 +9,7 @@ jobs: if: github.repository == 'opensearch-project/OpenSearch' runs-on: ubuntu-latest steps: - - uses: actions/github-script@v7.0.1 + - uses: actions/github-script@v7 with: script: | const { issue, repository } = context.payload; diff --git a/.github/workflows/version.yml b/.github/workflows/version.yml index 7f120b65d7c2e..2de54716256ff 100644 --- a/.github/workflows/version.yml +++ b/.github/workflows/version.yml @@ -129,7 +129,7 @@ jobs: - name: Create tracking issue id: create-issue - uses: actions/github-script@v7.0.1 + uses: actions/github-script@v7 with: script: | const body = ` diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b21fded97e47..7cc918b2ac089 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.apache.commons:commons-lang3` from 3.14.0 to 3.15.0 ([#14861](https://github.com/opensearch-project/OpenSearch/pull/14861)) - OpenJDK Update (July 2024 Patch releases) ([#14998](https://github.com/opensearch-project/OpenSearch/pull/14998)) - Bump `com.microsoft.azure:msal4j` from 1.16.1 to 1.16.2 ([#14995](https://github.com/opensearch-project/OpenSearch/pull/14995)) +- Bump `actions/github-script` from 6 to 7 ([#14997](https://github.com/opensearch-project/OpenSearch/pull/14997)) ### Changed