Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Fix valid cluster UUID logic for uncommitted cluster UUIDs #10944

Merged
merged 1 commit into from
Oct 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -872,25 +872,31 @@ private Map<String, ClusterMetadataManifest> getLatestManifestForAllClusterUUIDs
* @return List of cluster UUIDs. The first element is the most recent cluster UUID in the chain
*/
private List<String> createClusterChain(final Map<String, ClusterMetadataManifest> manifestsByClusterUUID, final String clusterName) {
final Map<String, String> clusterUUIDGraph = manifestsByClusterUUID.values()
final List<ClusterMetadataManifest> validClusterManifests = manifestsByClusterUUID.values()
.stream()
.filter(this::isValidClusterUUID)
.collect(Collectors.toList());
final Map<String, String> clusterUUIDGraph = validClusterManifests.stream()
.collect(Collectors.toMap(ClusterMetadataManifest::getClusterUUID, ClusterMetadataManifest::getPreviousClusterUUID));
final List<String> validClusterUUIDs = manifestsByClusterUUID.values()
.stream()
.filter(m -> !isInvalidClusterUUID(m) && !clusterUUIDGraph.containsValue(m.getClusterUUID()))
final List<String> topLevelClusterUUIDs = validClusterManifests.stream()
.map(ClusterMetadataManifest::getClusterUUID)
.filter(clusterUUID -> !clusterUUIDGraph.containsValue(clusterUUID))
.collect(Collectors.toList());
if (validClusterUUIDs.isEmpty()) {
logger.info("There is no valid previous cluster UUID");

if (topLevelClusterUUIDs.isEmpty()) {
// This can occur only when there are no valid cluster UUIDs
assert validClusterManifests.isEmpty() : "There are no top level cluster UUIDs even when there are valid cluster UUIDs";
logger.info("There is no valid previous cluster UUID. All cluster UUIDs evaluated are: {}", manifestsByClusterUUID.keySet());
return Collections.emptyList();
}
if (validClusterUUIDs.size() > 1) {
if (topLevelClusterUUIDs.size() > 1) {
logger.info("Top level cluster UUIDs: {}", topLevelClusterUUIDs);
// If the valid cluster UUIDs are more that 1, it means there was some race condition where
// more then 2 cluster manager nodes tried to become active cluster manager and published
// 2 cluster UUIDs which followed the same previous UUID.
final Map<String, ClusterMetadataManifest> manifestsByClusterUUIDTrimmed = trimClusterUUIDs(
manifestsByClusterUUID,
validClusterUUIDs,
topLevelClusterUUIDs,
clusterName
);
if (manifestsByClusterUUID.size() == manifestsByClusterUUIDTrimmed.size()) {
Expand All @@ -899,14 +905,14 @@ private List<String> createClusterChain(final Map<String, ClusterMetadataManifes
Locale.ROOT,
"The system has ended into multiple valid cluster states in the remote store. "
+ "Please check their latest manifest to decide which one you want to keep. Valid Cluster UUIDs: - %s",
validClusterUUIDs
topLevelClusterUUIDs
)
);
}
return createClusterChain(manifestsByClusterUUIDTrimmed, clusterName);
}
final List<String> validChain = new ArrayList<>();
String currentUUID = validClusterUUIDs.get(0);
String currentUUID = topLevelClusterUUIDs.get(0);
while (currentUUID != null && !ClusterState.UNKNOWN_UUID.equals(currentUUID)) {
validChain.add(currentUUID);
// Getting the previous cluster UUID of a cluster UUID from the clusterUUID Graph
Expand All @@ -932,11 +938,7 @@ private Map<String, ClusterMetadataManifest> trimClusterUUIDs(
// Here we compare the manifest of current UUID to that of previous UUID
// In case currentUUID's latest manifest is same as previous UUIDs latest manifest,
// that means it was restored from previousUUID and no IndexMetadata update was performed on it.
if (ClusterState.UNKNOWN_UUID.equals(currentManifest.getPreviousClusterUUID())) {
if (currentManifest.getIndices().isEmpty()) {
trimmedUUIDs.remove(clusterUUID);
}
} else {
if (!ClusterState.UNKNOWN_UUID.equals(currentManifest.getPreviousClusterUUID())) {
ClusterMetadataManifest previousManifest = trimmedUUIDs.get(currentManifest.getPreviousClusterUUID());
if (isMetadataEqual(currentManifest, previousManifest, clusterName)
&& isGlobalMetadataEqual(currentManifest, previousManifest, clusterName)) {
Expand Down Expand Up @@ -975,8 +977,8 @@ private boolean isGlobalMetadataEqual(ClusterMetadataManifest first, ClusterMeta
return Metadata.isGlobalResourcesMetadataEquals(firstGlobalMetadata, secondGlobalMetadata);
}

private boolean isInvalidClusterUUID(ClusterMetadataManifest manifest) {
return !manifest.isClusterUUIDCommitted();
private boolean isValidClusterUUID(ClusterMetadataManifest manifest) {
return manifest.isClusterUUIDCommitted();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -911,7 +911,7 @@ public void testGetValidPreviousClusterUUIDWithMultipleChains() throws IOExcepti
"cluster-uuid3",
"cluster-uuid1"
);
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean());
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean(), Collections.emptyMap());

remoteClusterStateService.start();
String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster");
Expand All @@ -933,6 +933,23 @@ public void testGetValidPreviousClusterUUIDWithInvalidMultipleChains() throws IO
assertThrows(IllegalStateException.class, () -> remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster"));
}

public void testGetValidPreviousClusterUUIDWhenLastUUIDUncommitted() throws IOException {
Map<String, String> clusterUUIDsPointers = Map.of(
"cluster-uuid1",
ClusterState.UNKNOWN_UUID,
"cluster-uuid2",
"cluster-uuid1",
"cluster-uuid3",
"cluster-uuid2"
);
Map<String, Boolean> clusterUUIDCommitted = Map.of("cluster-uuid1", true, "cluster-uuid2", true, "cluster-uuid3", false);
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, clusterUUIDCommitted);

remoteClusterStateService.start();
String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster");
assertThat(previousClusterUUID, equalTo("cluster-uuid2"));
}

public void testDeleteStaleClusterUUIDs() throws IOException {
final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build();
ClusterMetadataManifest clusterMetadataManifest = ClusterMetadataManifest.builder()
Expand Down Expand Up @@ -1128,11 +1145,21 @@ public void testGlobalMetadataUploadWaitTimeSetting() {
}

private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> clusterUUIDsPointers) throws IOException {
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false);
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, Collections.emptyMap());
}

private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> clusterUUIDsPointers, boolean differGlobalMetadata)
throws IOException {
private void mockObjectsForGettingPreviousClusterUUID(
Map<String, String> clusterUUIDsPointers,
Map<String, Boolean> clusterUUIDCommitted
) throws IOException {
mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, clusterUUIDCommitted);
}

private void mockObjectsForGettingPreviousClusterUUID(
Map<String, String> clusterUUIDsPointers,
boolean differGlobalMetadata,
Map<String, Boolean> clusterUUIDCommitted
) throws IOException {
final BlobPath blobPath = mock(BlobPath.class);
when((blobStoreRepository.basePath())).thenReturn(blobPath);
when(blobPath.add(anyString())).thenReturn(blobPath);
Expand All @@ -1155,7 +1182,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> cluste
clusterUUIDsPointers.get("cluster-uuid1"),
randomAlphaOfLength(10),
uploadedIndexMetadataList1,
"test-metadata1"
"test-metadata1",
clusterUUIDCommitted.getOrDefault("cluster-uuid1", true)
);
Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build();
IndexMetadata indexMetadata1 = IndexMetadata.builder("index1")
Expand Down Expand Up @@ -1184,7 +1212,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> cluste
clusterUUIDsPointers.get("cluster-uuid2"),
randomAlphaOfLength(10),
uploadedIndexMetadataList2,
"test-metadata2"
"test-metadata2",
clusterUUIDCommitted.getOrDefault("cluster-uuid2", true)
);
IndexMetadata indexMetadata3 = IndexMetadata.builder("index1")
.settings(indexSettings)
Expand Down Expand Up @@ -1229,7 +1258,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map<String, String> cluste
clusterUUIDsPointers.get("cluster-uuid3"),
randomAlphaOfLength(10),
uploadedIndexMetadataList3,
"test-metadata3"
"test-metadata3",
clusterUUIDCommitted.getOrDefault("cluster-uuid3", true)
);
mockBlobContainerForGlobalMetadata(blobContainer3, clusterManifest3, metadata3);
mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3, ClusterMetadataManifest.CODEC_V1);
Expand Down Expand Up @@ -1257,7 +1287,8 @@ private ClusterMetadataManifest generateClusterMetadataManifest(
String previousClusterUUID,
String stateUUID,
List<UploadedIndexMetadata> uploadedIndexMetadata,
String globalMetadataFileName
String globalMetadataFileName,
Boolean isUUIDCommitted
) {
return ClusterMetadataManifest.builder()
.indices(uploadedIndexMetadata)
Expand All @@ -1269,7 +1300,7 @@ private ClusterMetadataManifest generateClusterMetadataManifest(
.opensearchVersion(VersionUtils.randomOpenSearchVersion(random()))
.previousClusterUUID(previousClusterUUID)
.committed(true)
.clusterUUIDCommitted(true)
.clusterUUIDCommitted(isUUIDCommitted)
.globalMetadataFileName(globalMetadataFileName)
.codecVersion(ClusterMetadataManifest.CODEC_V1)
.build();
Expand Down
Loading