diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/ZooDefs.java b/zookeeper-server/src/main/java/org/apache/zookeeper/ZooDefs.java index b2a2388006c..df927f48b41 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/ZooDefs.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/ZooDefs.java @@ -164,6 +164,7 @@ public interface AddWatchModes { @InterfaceAudience.Public public interface GetChildrenPaginated { long lastPageMinCzxid = -1L; + int lastPageCzxidOffset = -1; } public static final String[] opNames = {"notification", "create", "delete", "exists", "getData", "setData", "getACL", "setACL", "getChildren", "getChildren2", "getMaxChildren", "setMaxChildren", "ping", "reconfig", "getConfig"}; diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/ZooKeeper.java b/zookeeper-server/src/main/java/org/apache/zookeeper/ZooKeeper.java index 562274888f7..729ef3c73d8 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/ZooKeeper.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/ZooKeeper.java @@ -2874,21 +2874,39 @@ public List getAllChildrenPaginated(String path, boolean watch) } Set childrenSet = new HashSet<>(childrenPaginated); + getChildrenRemainingPages(path, watcher, nextPage, childrenSet); + + return new ArrayList<>(childrenSet); + } + + private void getChildrenRemainingPages(String path, + Watcher watcher, + PaginationNextPage nextPage, + Set childrenSet) throws KeeperException, InterruptedException { + int retryCount = 0; do { + List childrenPaginated; long minCzxid = nextPage.getMinCzxid(); - // If a child with the same czxid is returned in the previous page, and then deleted - // on the server, the children not in the previous page but offset is less than czxidOffset - // would be missed in the next page. Use the last child in the previous page to determine whether or not - // the deletion case happens. If it happens, retry fetching the page starting from offset 0. - int czxidOffset = nextPage.getMinCzxidOffset() - 1; - childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, czxidOffset, null, nextPage); - if (!childrenPaginated.isEmpty() && !childrenSet.contains(childrenPaginated.get(0))) { - childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, 0, null, nextPage); + if (nextPage.getMinCzxidOffset() == 0) { + childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, + nextPage.getMinCzxidOffset(), null, nextPage); + } else { + // If a child with the same czxid is returned in the previous page, and then deleted + // on the server, the children not in the previous page but offset is less than czxidOffset + // would be missed in the next page. Use the last child in the previous page to determine whether or not + // the deletion case happens. If it happens, retry fetching the page starting from offset 0. + childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, + nextPage.getMinCzxidOffset() - 1, null, nextPage); + if (!childrenPaginated.isEmpty() && !childrenSet.contains(childrenPaginated.get(0))) { + if (retryCount++ >= 3) { + throw KeeperException.create(Code.OPERATIONTIMEOUT); + } + LOG.info("Retry fetching children for minZxid = {}, retries = {}", minCzxid, retryCount); + childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, 0, null, nextPage); + } } childrenSet.addAll(childrenPaginated); } while (nextPage.getMinCzxid() != ZooDefs.GetChildrenPaginated.lastPageMinCzxid); - - return new ArrayList<>(childrenSet); } private void updateNextPage(PaginationNextPage nextPage, long minCzxId, int czxIdOffset) { diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java index 7e53bdaebb6..959bc59bd70 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java @@ -878,18 +878,25 @@ public List getPaginatedChildren(String path, Stat stat, Watcher watcher updateReadStat(path, countReadChildrenBytes(allChildren)); if (outputNextPage != null) { outputNextPage.setMinCzxid(ZooDefs.GetChildrenPaginated.lastPageMinCzxid); + outputNextPage.setMinCzxidOffset(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset); } return allChildren; } } + return getSortedPaginatedChildren(n, path, stat, watcher, maxReturned, minCzxId, czxIdOffset, outputNextPage); + } + + private List getSortedPaginatedChildren(DataNode node, String path, Stat stat, Watcher watcher, + int maxReturned, long minCzxId, int czxIdOffset, + PaginationNextPage outputNextPage) { int index = 0; List targetChildren = new ArrayList(); List paginatedChildren = new ArrayList(); // Need to lock the parent node for the whole block between reading children list and adding watch - synchronized (n) { - buildChildrenPathWithStat(n, path, stat, minCzxId, targetChildren); + synchronized (node) { + buildChildrenPathWithStat(node, path, stat, minCzxId, targetChildren); targetChildren.sort(staticNodeCreationComparator); @@ -981,22 +988,30 @@ private void updateNextPage(PaginationNextPage nextPage, List chil if (lastAddedIndex == children.size() - 1) { // All children are added, so this is the last page nextPage.setMinCzxid(ZooDefs.GetChildrenPaginated.lastPageMinCzxid); + nextPage.setMinCzxidOffset(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset); return; } - // Find the minCzxidOffset next next page by searching the index (startIndex) of czxid - // that is not equal to current czxid. - // minCzxidOffset of next page = lastAddedIndex - startIndex long lastCzxid = children.get(lastAddedIndex).getStat().getCzxid(); - int startIndex = lastAddedIndex; - while (startIndex >= 0) { - if (children.get(startIndex).getStat().getCzxid() != lastCzxid) { - break; + long nextCzxid = children.get(lastAddedIndex + 1).getStat().getCzxid(); + int nextCzixdOffset = 0; + + if (nextCzxid == lastCzxid) { + // Find the minCzxidOffset next next page by searching the index (startIndex) of czxid + // that is not equal to current czxid. + // minCzxidOffset of next page = lastAddedIndex - startIndex + int startIndex = lastAddedIndex; + while (startIndex >= 0) { + if (children.get(startIndex).getStat().getCzxid() != lastCzxid) { + break; + } + startIndex--; } - startIndex--; + nextCzixdOffset = lastAddedIndex - startIndex; } - nextPage.setMinCzxid(lastCzxid); - nextPage.setMinCzxidOffset(lastAddedIndex - startIndex); + + nextPage.setMinCzxid(nextCzxid); + nextPage.setMinCzxidOffset(nextCzixdOffset); } public Stat setACL(String path, List acl, int version) throws KeeperException.NoNodeException { diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java index 091eb91a200..ff59e5a4442 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java @@ -32,6 +32,7 @@ import java.io.PrintWriter; import java.io.StringWriter; import java.lang.reflect.Field; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.CompletableFuture; @@ -44,6 +45,7 @@ import org.apache.jute.Record; import org.apache.zookeeper.KeeperException.NoNodeException; import org.apache.zookeeper.KeeperException.NodeExistsException; +import org.apache.zookeeper.PaginationNextPage; import org.apache.zookeeper.Quotas; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; @@ -320,14 +322,19 @@ public void testGetChildrenPaginated() throws NodeExistsException, NoNodeExcepti dt.createNode(rootPath, new byte[0], null, 0, dt.getNode("/").stat.getCversion() + 1, 1, 1); // Create 10 child nodes + List childrenCreated = new ArrayList<>(countNodes); for (int i = 0; i < countNodes; ++i) { dt.createNode(rootPath + "/test-" + i, new byte[0], null, 0, dt.getNode(rootPath).stat.getCversion() + i + 1, firstCzxId + i, 1); + childrenCreated.add("test-" + i); } // Asking from a negative for 5 nodes should return the 5, and not set the watch int curWatchCount = dt.getWatchCount(); - List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 5, -1, 0, null); + PaginationNextPage nextPage = new PaginationNextPage(); + List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 5, -1, 0, nextPage); assertEquals(5, result.size()); + assertEquals(firstCzxId + 5, nextPage.getMinCzxid()); + assertEquals(0, nextPage.getMinCzxidOffset()); assertEquals("The watch not should have been set", curWatchCount, dt.getWatchCount()); // Verify that the list is sorted String before = ""; @@ -339,8 +346,10 @@ public void testGetChildrenPaginated() throws NodeExistsException, NoNodeExcepti // Asking from a negative would give me all children, and set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), countNodes, -1, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), countNodes, -1, 0, nextPage); assertEquals(countNodes, result.size()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Verify that the list is sorted before = ""; @@ -350,26 +359,43 @@ public void testGetChildrenPaginated() throws NodeExistsException, NoNodeExcepti before = path; } + // Asking with maxReturned = MAX_INT would give me all children with no sorting, and set the watch + curWatchCount = dt.getWatchCount(); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), Integer.MAX_VALUE, 0, 0, nextPage); + assertEquals(countNodes, result.size()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); + assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); + // Verify that the list is not sorted + assertNotEquals("The returned list of children should not be sorted", childrenCreated, result); + // Asking from the last one should return only one node curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes - 1, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes - 1, 0, nextPage); assertEquals(1, result.size()); assertEquals("test-" + (countNodes - 1), result.get(0)); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Asking from the last created node+1 should return an empty list and set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes, 0, nextPage); assertTrue("The result should be an empty list", result.isEmpty()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Asking from -1 for one node should return two, and NOT set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, -1, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, -1, 0, nextPage); assertEquals("No watch should be set", curWatchCount, dt.getWatchCount()); assertEquals("We only return up to ", 1, result.size()); // Check that we ordered correctly assertEquals("test-0", result.get(0)); + // Check next page info is returned correctly + assertEquals(firstCzxId + 1, nextPage.getMinCzxid()); + assertEquals(0, nextPage.getMinCzxidOffset()); } @Test(timeout = 60000) @@ -398,8 +424,11 @@ public void testGetChildrenPaginatedWithOffset() throws NodeExistsException, NoN // Asking from a negative would give me all children, and set the watch int curWatchCount = dt.getWatchCount(); - List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1000, -1, 0, null); + PaginationNextPage nextPage = new PaginationNextPage(); + List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1000, -1, 0, nextPage); assertEquals(allNodes, result.size()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Verify that the list is sorted String before = ""; @@ -409,48 +438,70 @@ public void testGetChildrenPaginatedWithOffset() throws NodeExistsException, NoN before = path; } + // Asking with minCzxid = 0, offset = 0 should not skip anything. + // maxReturned = 1, the returned nextPage should be: minCzxid = next czxid, offset = 0. + curWatchCount = dt.getWatchCount(); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, 0, 0, nextPage); + assertEquals(1, result.size()); + assertEquals("test-0", result.get(0)); + assertEquals(childrenCzxId, nextPage.getMinCzxid()); + assertEquals(0, nextPage.getMinCzxidOffset()); + assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount()); + // Asking with offset minCzxId below childrenCzxId should not skip anything, regardless of offset curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId - 1, 3, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId - 1, 3, nextPage); assertEquals(2, result.size()); assertEquals("test-1", result.get(0)); assertEquals("test-2", result.get(1)); + assertEquals(childrenCzxId, nextPage.getMinCzxid()); + assertEquals(2, nextPage.getMinCzxidOffset()); assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount()); // Asking with offset 5 should skip nodes 1, 2, 3, 4, 5 curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId, 5, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId, 5, nextPage); assertEquals(2, result.size()); assertEquals("test-6", result.get(0)); assertEquals("test-7", result.get(1)); + assertEquals(childrenCzxId, nextPage.getMinCzxid()); + assertEquals(7, nextPage.getMinCzxidOffset()); assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount()); // Asking with offset 5 for more nodes than are there should skip nodes 1, 2, 3, 4, 5 (plus 0 due to zxid) curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 10, childrenCzxId, 5, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 10, childrenCzxId, 5, nextPage); assertEquals(5, result.size()); assertEquals("test-6", result.get(0)); assertEquals("test-7", result.get(1)); assertEquals("test-8", result.get(2)); assertEquals("test-9", result.get(3)); + assertEquals("test-999", result.get(4)); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Asking with offset 5 for fewer nodes than are there should skip nodes 1, 2, 3, 4, 5 (plus 0 due to zxid) + // Returned next page should be: minCzxid = next czxid, offset = 0 curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 4, childrenCzxId, 5, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 4, childrenCzxId, 5, nextPage); assertEquals(4, result.size()); assertEquals("test-6", result.get(0)); assertEquals("test-7", result.get(1)); assertEquals("test-8", result.get(2)); assertEquals("test-9", result.get(3)); + assertEquals(childrenCzxId + 100, nextPage.getMinCzxid()); + assertEquals(0, nextPage.getMinCzxidOffset()); assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount()); // Asking from the last created node+1 should return an empty list and set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + childrenCzxId, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + childrenCzxId, 0, nextPage); assertTrue("The result should be an empty list", result.isEmpty()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); } @@ -469,7 +520,7 @@ public void testGetChildrenPaginatedEmpty() throws NodeExistsException, NoNodeEx assertTrue("The result should be empty", result.isEmpty()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); - // Specify max int to fetch all children - trying to return all all without sorting, and set the watch + // Specify max int to fetch all children - trying to return all without sorting, and set the watch curWatchCount = dt.getWatchCount(); result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), Integer.MAX_VALUE, 0, 0, null); assertTrue("The result should be empty", result.isEmpty());