From 898d08aea98e87804d16018d938f1422c96db110 Mon Sep 17 00:00:00 2001 From: Huizhi Lu Date: Thu, 3 Dec 2020 21:32:19 -0800 Subject: [PATCH] NextPage returns the exact minCzxid and offset --- .../java/org/apache/zookeeper/ZooDefs.java | 1 + .../java/org/apache/zookeeper/ZooKeeper.java | 49 ++++++--- .../org/apache/zookeeper/server/DataTree.java | 39 ++++--- .../apache/zookeeper/server/DataTreeTest.java | 102 +++++++++++++++--- 4 files changed, 152 insertions(+), 39 deletions(-) diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/ZooDefs.java b/zookeeper-server/src/main/java/org/apache/zookeeper/ZooDefs.java index b2a2388006c..df927f48b41 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/ZooDefs.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/ZooDefs.java @@ -164,6 +164,7 @@ public interface AddWatchModes { @InterfaceAudience.Public public interface GetChildrenPaginated { long lastPageMinCzxid = -1L; + int lastPageCzxidOffset = -1; } public static final String[] opNames = {"notification", "create", "delete", "exists", "getData", "setData", "getACL", "setACL", "getChildren", "getChildren2", "getMaxChildren", "setMaxChildren", "ping", "reconfig", "getConfig"}; diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/ZooKeeper.java b/zookeeper-server/src/main/java/org/apache/zookeeper/ZooKeeper.java index 562274888f7..1f937dc4221 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/ZooKeeper.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/ZooKeeper.java @@ -2781,9 +2781,14 @@ public List getChildren(final String path, Watcher watcher) throws Keepe * @param outputNextPage * - if not null, {@link PaginationNextPage} info returned from server will be copied to * {@code outputNextPage}. The info can be used for fetching the next page of remaining children, - * or checking whether the returned page is the last page. + * or checking whether the returned page is the last page by comparing + * {@link PaginationNextPage#getMinCzxid()} with + * {@link org.apache.zookeeper.ZooDefs.GetChildrenPaginated#lastPageMinCzxid} * @return - * a list of children nodes, up to {@code maxReturned} + * a list of children nodes, up to {@code maxReturned}. + *

When params are set {@code maxReturned} = {@code Integer.MAX_VALUE}, {@code minCzxid} <= 0: + * the children list is unordered if all the children can be returned in a page; + *

Otherwise, the children list is ordered by czxid. * @throws KeeperException * if the server signals an error with a non-zero error code. * @throws IllegalArgumentException @@ -2860,7 +2865,7 @@ public List getChildren(final String path, * * @param path the path of the parent node * @param watch whether or not leave a watch on the given node - * @return a list of all children of the given path + * @return a unordered list of all children of the given path * @throws KeeperException if the server signals an error with a non-zero error code. * @throws InterruptedException if the server transaction is interrupted. */ @@ -2874,21 +2879,39 @@ public List getAllChildrenPaginated(String path, boolean watch) } Set childrenSet = new HashSet<>(childrenPaginated); + getChildrenRemainingPages(path, watcher, nextPage, childrenSet); + + return new ArrayList<>(childrenSet); + } + + private void getChildrenRemainingPages(String path, + Watcher watcher, + PaginationNextPage nextPage, + Set childrenSet) throws KeeperException, InterruptedException { + int retryCount = 0; do { + List childrenPaginated; long minCzxid = nextPage.getMinCzxid(); - // If a child with the same czxid is returned in the previous page, and then deleted - // on the server, the children not in the previous page but offset is less than czxidOffset - // would be missed in the next page. Use the last child in the previous page to determine whether or not - // the deletion case happens. If it happens, retry fetching the page starting from offset 0. - int czxidOffset = nextPage.getMinCzxidOffset() - 1; - childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, czxidOffset, null, nextPage); - if (!childrenPaginated.isEmpty() && !childrenSet.contains(childrenPaginated.get(0))) { - childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, 0, null, nextPage); + if (nextPage.getMinCzxidOffset() == 0) { + childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, + nextPage.getMinCzxidOffset(), null, nextPage); + } else { + // If a child with the same czxid is returned in the previous page, and then deleted + // on the server, the children not in the previous page but offset is less than czxidOffset + // would be missed in the next page. Use the last child in the previous page to determine whether or not + // the deletion case happens. If it happens, retry fetching the page starting from offset 0. + childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, + nextPage.getMinCzxidOffset() - 1, null, nextPage); + if (!childrenPaginated.isEmpty() && !childrenSet.contains(childrenPaginated.get(0))) { + if (retryCount++ >= 3) { + throw KeeperException.create(Code.OPERATIONTIMEOUT); + } + LOG.info("Retry fetching children for minZxid = {}, retries = {}", minCzxid, retryCount); + childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, 0, null, nextPage); + } } childrenSet.addAll(childrenPaginated); } while (nextPage.getMinCzxid() != ZooDefs.GetChildrenPaginated.lastPageMinCzxid); - - return new ArrayList<>(childrenSet); } private void updateNextPage(PaginationNextPage nextPage, long minCzxId, int czxIdOffset) { diff --git a/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java b/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java index 7e53bdaebb6..959bc59bd70 100644 --- a/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java +++ b/zookeeper-server/src/main/java/org/apache/zookeeper/server/DataTree.java @@ -878,18 +878,25 @@ public List getPaginatedChildren(String path, Stat stat, Watcher watcher updateReadStat(path, countReadChildrenBytes(allChildren)); if (outputNextPage != null) { outputNextPage.setMinCzxid(ZooDefs.GetChildrenPaginated.lastPageMinCzxid); + outputNextPage.setMinCzxidOffset(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset); } return allChildren; } } + return getSortedPaginatedChildren(n, path, stat, watcher, maxReturned, minCzxId, czxIdOffset, outputNextPage); + } + + private List getSortedPaginatedChildren(DataNode node, String path, Stat stat, Watcher watcher, + int maxReturned, long minCzxId, int czxIdOffset, + PaginationNextPage outputNextPage) { int index = 0; List targetChildren = new ArrayList(); List paginatedChildren = new ArrayList(); // Need to lock the parent node for the whole block between reading children list and adding watch - synchronized (n) { - buildChildrenPathWithStat(n, path, stat, minCzxId, targetChildren); + synchronized (node) { + buildChildrenPathWithStat(node, path, stat, minCzxId, targetChildren); targetChildren.sort(staticNodeCreationComparator); @@ -981,22 +988,30 @@ private void updateNextPage(PaginationNextPage nextPage, List chil if (lastAddedIndex == children.size() - 1) { // All children are added, so this is the last page nextPage.setMinCzxid(ZooDefs.GetChildrenPaginated.lastPageMinCzxid); + nextPage.setMinCzxidOffset(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset); return; } - // Find the minCzxidOffset next next page by searching the index (startIndex) of czxid - // that is not equal to current czxid. - // minCzxidOffset of next page = lastAddedIndex - startIndex long lastCzxid = children.get(lastAddedIndex).getStat().getCzxid(); - int startIndex = lastAddedIndex; - while (startIndex >= 0) { - if (children.get(startIndex).getStat().getCzxid() != lastCzxid) { - break; + long nextCzxid = children.get(lastAddedIndex + 1).getStat().getCzxid(); + int nextCzixdOffset = 0; + + if (nextCzxid == lastCzxid) { + // Find the minCzxidOffset next next page by searching the index (startIndex) of czxid + // that is not equal to current czxid. + // minCzxidOffset of next page = lastAddedIndex - startIndex + int startIndex = lastAddedIndex; + while (startIndex >= 0) { + if (children.get(startIndex).getStat().getCzxid() != lastCzxid) { + break; + } + startIndex--; } - startIndex--; + nextCzixdOffset = lastAddedIndex - startIndex; } - nextPage.setMinCzxid(lastCzxid); - nextPage.setMinCzxidOffset(lastAddedIndex - startIndex); + + nextPage.setMinCzxid(nextCzxid); + nextPage.setMinCzxidOffset(nextCzixdOffset); } public Stat setACL(String path, List acl, int version) throws KeeperException.NoNodeException { diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java index 091eb91a200..820b6140c6f 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/DataTreeTest.java @@ -32,6 +32,8 @@ import java.io.PrintWriter; import java.io.StringWriter; import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.concurrent.CompletableFuture; @@ -44,6 +46,7 @@ import org.apache.jute.Record; import org.apache.zookeeper.KeeperException.NoNodeException; import org.apache.zookeeper.KeeperException.NodeExistsException; +import org.apache.zookeeper.PaginationNextPage; import org.apache.zookeeper.Quotas; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; @@ -320,14 +323,19 @@ public void testGetChildrenPaginated() throws NodeExistsException, NoNodeExcepti dt.createNode(rootPath, new byte[0], null, 0, dt.getNode("/").stat.getCversion() + 1, 1, 1); // Create 10 child nodes + List childrenCreated = new ArrayList<>(countNodes); for (int i = 0; i < countNodes; ++i) { dt.createNode(rootPath + "/test-" + i, new byte[0], null, 0, dt.getNode(rootPath).stat.getCversion() + i + 1, firstCzxId + i, 1); + childrenCreated.add("test-" + i); } // Asking from a negative for 5 nodes should return the 5, and not set the watch int curWatchCount = dt.getWatchCount(); - List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 5, -1, 0, null); + PaginationNextPage nextPage = new PaginationNextPage(); + List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 5, -1, 0, nextPage); assertEquals(5, result.size()); + assertEquals(firstCzxId + 5, nextPage.getMinCzxid()); + assertEquals(0, nextPage.getMinCzxidOffset()); assertEquals("The watch not should have been set", curWatchCount, dt.getWatchCount()); // Verify that the list is sorted String before = ""; @@ -339,8 +347,10 @@ public void testGetChildrenPaginated() throws NodeExistsException, NoNodeExcepti // Asking from a negative would give me all children, and set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), countNodes, -1, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), countNodes, -1, 0, nextPage); assertEquals(countNodes, result.size()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Verify that the list is sorted before = ""; @@ -350,26 +360,60 @@ public void testGetChildrenPaginated() throws NodeExistsException, NoNodeExcepti before = path; } + // Asking with maxReturned = MAX_INT would give me all children with no sorting, and set the watch + curWatchCount = dt.getWatchCount(); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), Integer.MAX_VALUE, 0, 0, nextPage); + assertEquals(countNodes, result.size()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); + assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); + // Verify that the list is not sorted + assertNotEquals("The returned list of children should not be sorted", childrenCreated, result); + + // Passing a null watch when fetching all children should not set the watch + curWatchCount = dt.getWatchCount(); + result = dt.getPaginatedChildren(rootPath, null, null, Integer.MAX_VALUE, 0, 0, nextPage); + assertEquals(countNodes, result.size()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); + assertEquals("The watch should have been set", curWatchCount, dt.getWatchCount()); + + // Passing a null watch when fetching all children should not set the watch + curWatchCount = dt.getWatchCount(); + result = dt.getPaginatedChildren(rootPath, null, null, countNodes, 0, 0, nextPage); + // Returned result is ordered + assertEquals(childrenCreated, result); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); + assertEquals("The watch should have been set", curWatchCount, dt.getWatchCount()); + // Asking from the last one should return only one node curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes - 1, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes - 1, 0, nextPage); assertEquals(1, result.size()); assertEquals("test-" + (countNodes - 1), result.get(0)); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Asking from the last created node+1 should return an empty list and set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes, 0, nextPage); assertTrue("The result should be an empty list", result.isEmpty()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Asking from -1 for one node should return two, and NOT set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, -1, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, -1, 0, nextPage); assertEquals("No watch should be set", curWatchCount, dt.getWatchCount()); assertEquals("We only return up to ", 1, result.size()); // Check that we ordered correctly assertEquals("test-0", result.get(0)); + // Check next page info is returned correctly + assertEquals(firstCzxId + 1, nextPage.getMinCzxid()); + assertEquals(0, nextPage.getMinCzxidOffset()); } @Test(timeout = 60000) @@ -398,8 +442,11 @@ public void testGetChildrenPaginatedWithOffset() throws NodeExistsException, NoN // Asking from a negative would give me all children, and set the watch int curWatchCount = dt.getWatchCount(); - List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1000, -1, 0, null); + PaginationNextPage nextPage = new PaginationNextPage(); + List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1000, -1, 0, nextPage); assertEquals(allNodes, result.size()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Verify that the list is sorted String before = ""; @@ -409,48 +456,70 @@ public void testGetChildrenPaginatedWithOffset() throws NodeExistsException, NoN before = path; } + // Asking with minCzxid = 0, offset = 0 should not skip anything. + // maxReturned = 1, the returned nextPage should be: minCzxid = next czxid, offset = 0. + curWatchCount = dt.getWatchCount(); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, 0, 0, nextPage); + assertEquals(1, result.size()); + assertEquals("test-0", result.get(0)); + assertEquals(childrenCzxId, nextPage.getMinCzxid()); + assertEquals(0, nextPage.getMinCzxidOffset()); + assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount()); + // Asking with offset minCzxId below childrenCzxId should not skip anything, regardless of offset curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId - 1, 3, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId - 1, 3, nextPage); assertEquals(2, result.size()); assertEquals("test-1", result.get(0)); assertEquals("test-2", result.get(1)); + assertEquals(childrenCzxId, nextPage.getMinCzxid()); + assertEquals(2, nextPage.getMinCzxidOffset()); assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount()); // Asking with offset 5 should skip nodes 1, 2, 3, 4, 5 curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId, 5, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId, 5, nextPage); assertEquals(2, result.size()); assertEquals("test-6", result.get(0)); assertEquals("test-7", result.get(1)); + assertEquals(childrenCzxId, nextPage.getMinCzxid()); + assertEquals(7, nextPage.getMinCzxidOffset()); assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount()); // Asking with offset 5 for more nodes than are there should skip nodes 1, 2, 3, 4, 5 (plus 0 due to zxid) curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 10, childrenCzxId, 5, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 10, childrenCzxId, 5, nextPage); assertEquals(5, result.size()); assertEquals("test-6", result.get(0)); assertEquals("test-7", result.get(1)); assertEquals("test-8", result.get(2)); assertEquals("test-9", result.get(3)); + assertEquals("test-999", result.get(4)); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); // Asking with offset 5 for fewer nodes than are there should skip nodes 1, 2, 3, 4, 5 (plus 0 due to zxid) + // Returned next page should be: minCzxid = next czxid, offset = 0 curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 4, childrenCzxId, 5, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 4, childrenCzxId, 5, nextPage); assertEquals(4, result.size()); assertEquals("test-6", result.get(0)); assertEquals("test-7", result.get(1)); assertEquals("test-8", result.get(2)); assertEquals("test-9", result.get(3)); + assertEquals(childrenCzxId + 100, nextPage.getMinCzxid()); + assertEquals(0, nextPage.getMinCzxidOffset()); assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount()); // Asking from the last created node+1 should return an empty list and set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + childrenCzxId, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + childrenCzxId, 0, nextPage); assertTrue("The result should be an empty list", result.isEmpty()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); } @@ -465,14 +534,19 @@ public void testGetChildrenPaginatedEmpty() throws NodeExistsException, NoNodeEx // Asking from a negative would give me all children, and set the watch // This goes to the pagination branch. int curWatchCount = dt.getWatchCount(); - List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 100, -1, 0, null); + PaginationNextPage nextPage = new PaginationNextPage(); + List result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 100, -1, 0, nextPage); assertTrue("The result should be empty", result.isEmpty()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); - // Specify max int to fetch all children - trying to return all all without sorting, and set the watch + // Specify max int to fetch all children - trying to return all without sorting, and set the watch curWatchCount = dt.getWatchCount(); - result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), Integer.MAX_VALUE, 0, 0, null); + result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), Integer.MAX_VALUE, 0, 0, nextPage); assertTrue("The result should be empty", result.isEmpty()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid()); + assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset()); assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount()); }