From 3726ed0791818054f5f8219b8f5df345c534f21a Mon Sep 17 00:00:00 2001 From: xyuanlu Date: Mon, 24 Jun 2024 09:51:37 -0700 Subject: [PATCH] fix race condition --- .../leaderelection/LeaderElectionClient.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/meta-client/src/main/java/org/apache/helix/metaclient/recipes/leaderelection/LeaderElectionClient.java b/meta-client/src/main/java/org/apache/helix/metaclient/recipes/leaderelection/LeaderElectionClient.java index ae7d9c9fab..48a64a61d8 100644 --- a/meta-client/src/main/java/org/apache/helix/metaclient/recipes/leaderelection/LeaderElectionClient.java +++ b/meta-client/src/main/java/org/apache/helix/metaclient/recipes/leaderelection/LeaderElectionClient.java @@ -38,7 +38,6 @@ import org.apache.helix.metaclient.exception.MetaClientNoNodeException; import org.apache.helix.metaclient.exception.MetaClientNodeExistsException; import org.apache.helix.metaclient.factories.MetaClientConfig; -import org.apache.helix.metaclient.impl.zk.ZkMetaClient; import org.apache.helix.metaclient.impl.zk.factory.ZkMetaClientConfig; import org.apache.helix.metaclient.impl.zk.factory.ZkMetaClientFactory; import org.slf4j.Logger; @@ -167,6 +166,7 @@ private void createParticipantInfo(String leaderPath, LeaderInfo participantInfo // try to create participant info entry, assuming leader election group node is already there _metaClient.create(leaderPath + PARTICIPANTS_ENTRY_PARENT + _participant, participantInfo, MetaClientInterface.EntryMode.EPHEMERAL); + LOG.info("Participant {} joined leader group {}.", _participant, leaderPath); } catch (MetaClientNodeExistsException ex) { throw new ConcurrentModificationException("Already joined leader election group. ", ex); } catch (MetaClientNoNodeException ex) { @@ -261,6 +261,7 @@ private void relinquishLeaderHelper(String leaderPath, Boolean exitLeaderElectio // deleting ZNode. So that handler in ReElectListener won't recreate the leader node. if (exitLeaderElectionParticipantPool) { _leaderGroups.remove(leaderPath + LEADER_ENTRY_KEY); + LOG.info("Leaving leader election pool {}.", leaderPath); _metaClient.delete(leaderPath + PARTICIPANTS_ENTRY_PARENT + _participant); } // check if current participant is the leader @@ -272,12 +273,13 @@ private void relinquishLeaderHelper(String leaderPath, Boolean exitLeaderElectio List ops = Arrays.asList(Op.check(key, expectedVersion), Op.delete(key, expectedVersion)); //Execute transactional support on operations List opResults = _metaClient.transactionOP(ops); + LOG.info("Try relinquish leader {}.", leaderPath); if (opResults.get(0).getType() == ERRORRESULT) { if (isLeader(leaderPath)) { // Participant re-elected as leader. throw new ConcurrentModificationException("Concurrent operation, please retry"); } else { - LOG.info("Someone else is already leader"); + LOG.info("Someone else is already leader when relinquishing leadership for path {}.", leaderPath); } } } @@ -366,7 +368,7 @@ public void unsubscribeLeadershipChanges(String leaderPath, LeaderElectionListen @Override public void close() throws Exception { - + LOG.info("Closing leader election client."); _metaClient.unsubscribeConnectStateChanges(_connectStateListener); // exit all previous joined leader election groups @@ -406,9 +408,11 @@ class ConnectStateListener implements ConnectStateChangeListener { @Override public void handleConnectStateChanged(MetaClientInterface.ConnectState prevState, MetaClientInterface.ConnectState currentState) throws Exception { - if (prevState == MetaClientInterface.ConnectState.EXPIRED + LOG.info("Connect state changed from {} to {}", prevState, currentState); + if ((prevState == MetaClientInterface.ConnectState.EXPIRED || prevState == MetaClientInterface.ConnectState.CLOSED_BY_CLIENT) && currentState == MetaClientInterface.ConnectState.CONNECTED) { for (String leaderPath : _participantInfos.keySet()) { + LOG.info("Recreate participant node for leaderPath {}.", leaderPath); _metaClient.create(leaderPath + PARTICIPANTS_ENTRY_PARENT + _participant, _participantInfos.get(leaderPath), MetaClientInterface.EntryMode.EPHEMERAL); } @@ -431,6 +435,7 @@ private void touchLeaderNode() { if (tup.left.getLeaderName().equalsIgnoreCase(_participant)) { int expectedVersion = tup.right.getVersion(); try { + LOG.info("Try touch leader node for path {}", _leaderGroups); _metaClient.set(key, tup.left, expectedVersion); } catch (MetaClientNoNodeException ex) { LOG.info("leaderPath {} gone when retouch leader node.", key);