Skip to content

Commit

Permalink
NextPage returns the exact minCzxid and offset
Browse files Browse the repository at this point in the history
  • Loading branch information
huizhilu committed Dec 4, 2020
1 parent c400bc1 commit dce1ae9
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ public interface AddWatchModes {
@InterfaceAudience.Public
public interface GetChildrenPaginated {
long lastPageMinCzxid = -1L;
int lastPageCzxidOffset = -1;
}

public static final String[] opNames = {"notification", "create", "delete", "exists", "getData", "setData", "getACL", "setACL", "getChildren", "getChildren2", "getMaxChildren", "setMaxChildren", "ping", "reconfig", "getConfig"};
Expand Down
38 changes: 28 additions & 10 deletions zookeeper-server/src/main/java/org/apache/zookeeper/ZooKeeper.java
Original file line number Diff line number Diff line change
Expand Up @@ -2874,21 +2874,39 @@ public List<String> getAllChildrenPaginated(String path, boolean watch)
}

Set<String> childrenSet = new HashSet<>(childrenPaginated);
getChildrenRemainingPages(path, watcher, nextPage, childrenSet);

return new ArrayList<>(childrenSet);
}

private void getChildrenRemainingPages(String path,
Watcher watcher,
PaginationNextPage nextPage,
Set<String> childrenSet) throws KeeperException, InterruptedException {
int retryCount = 0;
do {
List<String> childrenPaginated;
long minCzxid = nextPage.getMinCzxid();
// If a child with the same czxid is returned in the previous page, and then deleted
// on the server, the children not in the previous page but offset is less than czxidOffset
// would be missed in the next page. Use the last child in the previous page to determine whether or not
// the deletion case happens. If it happens, retry fetching the page starting from offset 0.
int czxidOffset = nextPage.getMinCzxidOffset() - 1;
childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, czxidOffset, null, nextPage);
if (!childrenPaginated.isEmpty() && !childrenSet.contains(childrenPaginated.get(0))) {
childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, 0, null, nextPage);
if (nextPage.getMinCzxidOffset() == 0) {
childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid,
nextPage.getMinCzxidOffset(), null, nextPage);
} else {
// If a child with the same czxid is returned in the previous page, and then deleted
// on the server, the children not in the previous page but offset is less than czxidOffset
// would be missed in the next page. Use the last child in the previous page to determine whether or not
// the deletion case happens. If it happens, retry fetching the page starting from offset 0.
childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid,
nextPage.getMinCzxidOffset() - 1, null, nextPage);
if (!childrenPaginated.isEmpty() && !childrenSet.contains(childrenPaginated.get(0))) {
if (retryCount++ >= 3) {
throw KeeperException.create(Code.OPERATIONTIMEOUT);
}
LOG.info("Retry fetching children for minZxid = {}, retries = {}", minCzxid, retryCount);
childrenPaginated = getChildren(path, watcher, Integer.MAX_VALUE, minCzxid, 0, null, nextPage);
}
}
childrenSet.addAll(childrenPaginated);
} while (nextPage.getMinCzxid() != ZooDefs.GetChildrenPaginated.lastPageMinCzxid);

return new ArrayList<>(childrenSet);
}

private void updateNextPage(PaginationNextPage nextPage, long minCzxId, int czxIdOffset) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -878,18 +878,25 @@ public List<String> getPaginatedChildren(String path, Stat stat, Watcher watcher
updateReadStat(path, countReadChildrenBytes(allChildren));
if (outputNextPage != null) {
outputNextPage.setMinCzxid(ZooDefs.GetChildrenPaginated.lastPageMinCzxid);
outputNextPage.setMinCzxidOffset(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset);
}
return allChildren;
}
}

return getSortedPaginatedChildren(n, path, stat, watcher, maxReturned, minCzxId, czxIdOffset, outputNextPage);
}

private List<String> getSortedPaginatedChildren(DataNode node, String path, Stat stat, Watcher watcher,
int maxReturned, long minCzxId, int czxIdOffset,
PaginationNextPage outputNextPage) {
int index = 0;
List<PathWithStat> targetChildren = new ArrayList<PathWithStat>();
List<String> paginatedChildren = new ArrayList<String>();

// Need to lock the parent node for the whole block between reading children list and adding watch
synchronized (n) {
buildChildrenPathWithStat(n, path, stat, minCzxId, targetChildren);
synchronized (node) {
buildChildrenPathWithStat(node, path, stat, minCzxId, targetChildren);

targetChildren.sort(staticNodeCreationComparator);

Expand Down Expand Up @@ -981,22 +988,30 @@ private void updateNextPage(PaginationNextPage nextPage, List<PathWithStat> chil
if (lastAddedIndex == children.size() - 1) {
// All children are added, so this is the last page
nextPage.setMinCzxid(ZooDefs.GetChildrenPaginated.lastPageMinCzxid);
nextPage.setMinCzxidOffset(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset);
return;
}

// Find the minCzxidOffset next next page by searching the index (startIndex) of czxid
// that is not equal to current czxid.
// minCzxidOffset of next page = lastAddedIndex - startIndex
long lastCzxid = children.get(lastAddedIndex).getStat().getCzxid();
int startIndex = lastAddedIndex;
while (startIndex >= 0) {
if (children.get(startIndex).getStat().getCzxid() != lastCzxid) {
break;
long nextCzxid = children.get(lastAddedIndex + 1).getStat().getCzxid();
int nextCzixdOffset = 0;

if (nextCzxid == lastCzxid) {
// Find the minCzxidOffset next next page by searching the index (startIndex) of czxid
// that is not equal to current czxid.
// minCzxidOffset of next page = lastAddedIndex - startIndex
int startIndex = lastAddedIndex;
while (startIndex >= 0) {
if (children.get(startIndex).getStat().getCzxid() != lastCzxid) {
break;
}
startIndex--;
}
startIndex--;
nextCzixdOffset = lastAddedIndex - startIndex;
}
nextPage.setMinCzxid(lastCzxid);
nextPage.setMinCzxidOffset(lastAddedIndex - startIndex);

nextPage.setMinCzxid(nextCzxid);
nextPage.setMinCzxidOffset(nextCzixdOffset);
}

public Stat setACL(String path, List<ACL> acl, int version) throws KeeperException.NoNodeException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import java.io.PrintWriter;
import java.io.StringWriter;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
Expand All @@ -44,6 +45,7 @@
import org.apache.jute.Record;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.apache.zookeeper.PaginationNextPage;
import org.apache.zookeeper.Quotas;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
Expand Down Expand Up @@ -320,14 +322,19 @@ public void testGetChildrenPaginated() throws NodeExistsException, NoNodeExcepti
dt.createNode(rootPath, new byte[0], null, 0, dt.getNode("/").stat.getCversion() + 1, 1, 1);

// Create 10 child nodes
List<String> childrenCreated = new ArrayList<>(countNodes);
for (int i = 0; i < countNodes; ++i) {
dt.createNode(rootPath + "/test-" + i, new byte[0], null, 0, dt.getNode(rootPath).stat.getCversion() + i + 1, firstCzxId + i, 1);
childrenCreated.add("test-" + i);
}

// Asking from a negative for 5 nodes should return the 5, and not set the watch
int curWatchCount = dt.getWatchCount();
List<String> result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 5, -1, 0, null);
PaginationNextPage nextPage = new PaginationNextPage();
List<String> result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 5, -1, 0, nextPage);
assertEquals(5, result.size());
assertEquals(firstCzxId + 5, nextPage.getMinCzxid());
assertEquals(0, nextPage.getMinCzxidOffset());
assertEquals("The watch not should have been set", curWatchCount, dt.getWatchCount());
// Verify that the list is sorted
String before = "";
Expand All @@ -339,8 +346,10 @@ public void testGetChildrenPaginated() throws NodeExistsException, NoNodeExcepti

// Asking from a negative would give me all children, and set the watch
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), countNodes, -1, 0, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), countNodes, -1, 0, nextPage);
assertEquals(countNodes, result.size());
assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid());
assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());
// Verify that the list is sorted
before = "";
Expand All @@ -350,26 +359,43 @@ public void testGetChildrenPaginated() throws NodeExistsException, NoNodeExcepti
before = path;
}

// Asking with maxReturned = MAX_INT would give me all children with no sorting, and set the watch
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), Integer.MAX_VALUE, 0, 0, nextPage);
assertEquals(countNodes, result.size());
assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid());
assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());
// Verify that the list is not sorted
assertNotEquals("The returned list of children should not be sorted", childrenCreated, result);

// Asking from the last one should return only one node
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes - 1, 0, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes - 1, 0, nextPage);
assertEquals(1, result.size());
assertEquals("test-" + (countNodes - 1), result.get(0));
assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid());
assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());

// Asking from the last created node+1 should return an empty list and set the watch
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes, 0, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + countNodes, 0, nextPage);
assertTrue("The result should be an empty list", result.isEmpty());
assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid());
assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());

// Asking from -1 for one node should return two, and NOT set the watch
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, -1, 0, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, -1, 0, nextPage);
assertEquals("No watch should be set", curWatchCount, dt.getWatchCount());
assertEquals("We only return up to ", 1, result.size());
// Check that we ordered correctly
assertEquals("test-0", result.get(0));
// Check next page info is returned correctly
assertEquals(firstCzxId + 1, nextPage.getMinCzxid());
assertEquals(0, nextPage.getMinCzxidOffset());
}

@Test(timeout = 60000)
Expand Down Expand Up @@ -398,8 +424,11 @@ public void testGetChildrenPaginatedWithOffset() throws NodeExistsException, NoN

// Asking from a negative would give me all children, and set the watch
int curWatchCount = dt.getWatchCount();
List<String> result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1000, -1, 0, null);
PaginationNextPage nextPage = new PaginationNextPage();
List<String> result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1000, -1, 0, nextPage);
assertEquals(allNodes, result.size());
assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid());
assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());
// Verify that the list is sorted
String before = "";
Expand All @@ -409,48 +438,70 @@ public void testGetChildrenPaginatedWithOffset() throws NodeExistsException, NoN
before = path;
}

// Asking with minCzxid = 0, offset = 0 should not skip anything.
// maxReturned = 1, the returned nextPage should be: minCzxid = next czxid, offset = 0.
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 1, 0, 0, nextPage);
assertEquals(1, result.size());
assertEquals("test-0", result.get(0));
assertEquals(childrenCzxId, nextPage.getMinCzxid());
assertEquals(0, nextPage.getMinCzxidOffset());
assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount());

// Asking with offset minCzxId below childrenCzxId should not skip anything, regardless of offset
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId - 1, 3, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId - 1, 3, nextPage);
assertEquals(2, result.size());
assertEquals("test-1", result.get(0));
assertEquals("test-2", result.get(1));
assertEquals(childrenCzxId, nextPage.getMinCzxid());
assertEquals(2, nextPage.getMinCzxidOffset());
assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount());

// Asking with offset 5 should skip nodes 1, 2, 3, 4, 5
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId, 5, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, childrenCzxId, 5, nextPage);
assertEquals(2, result.size());
assertEquals("test-6", result.get(0));
assertEquals("test-7", result.get(1));
assertEquals(childrenCzxId, nextPage.getMinCzxid());
assertEquals(7, nextPage.getMinCzxidOffset());
assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount());

// Asking with offset 5 for more nodes than are there should skip nodes 1, 2, 3, 4, 5 (plus 0 due to zxid)
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 10, childrenCzxId, 5, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 10, childrenCzxId, 5, nextPage);

assertEquals(5, result.size());
assertEquals("test-6", result.get(0));
assertEquals("test-7", result.get(1));
assertEquals("test-8", result.get(2));
assertEquals("test-9", result.get(3));
assertEquals("test-999", result.get(4));
assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid());
assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());

// Asking with offset 5 for fewer nodes than are there should skip nodes 1, 2, 3, 4, 5 (plus 0 due to zxid)
// Returned next page should be: minCzxid = next czxid, offset = 0
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 4, childrenCzxId, 5, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 4, childrenCzxId, 5, nextPage);

assertEquals(4, result.size());
assertEquals("test-6", result.get(0));
assertEquals("test-7", result.get(1));
assertEquals("test-8", result.get(2));
assertEquals("test-9", result.get(3));
assertEquals(childrenCzxId + 100, nextPage.getMinCzxid());
assertEquals(0, nextPage.getMinCzxidOffset());
assertEquals("The watch should not have been set", curWatchCount, dt.getWatchCount());

// Asking from the last created node+1 should return an empty list and set the watch
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + childrenCzxId, 0, null);
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), 2, 1000 + childrenCzxId, 0, nextPage);
assertTrue("The result should be an empty list", result.isEmpty());
assertEquals(ZooDefs.GetChildrenPaginated.lastPageMinCzxid, nextPage.getMinCzxid());
assertEquals(ZooDefs.GetChildrenPaginated.lastPageCzxidOffset, nextPage.getMinCzxidOffset());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());
}

Expand All @@ -469,7 +520,7 @@ public void testGetChildrenPaginatedEmpty() throws NodeExistsException, NoNodeEx
assertTrue("The result should be empty", result.isEmpty());
assertEquals("The watch should have been set", curWatchCount + 1, dt.getWatchCount());

// Specify max int to fetch all children - trying to return all all without sorting, and set the watch
// Specify max int to fetch all children - trying to return all without sorting, and set the watch
curWatchCount = dt.getWatchCount();
result = dt.getPaginatedChildren(rootPath, null, new DummyWatcher(), Integer.MAX_VALUE, 0, 0, null);
assertTrue("The result should be empty", result.isEmpty());
Expand Down

0 comments on commit dce1ae9

Please sign in to comment.