Skip to content

Commit

Permalink
HPCC-31539 Avoid unbounded index cache with slow remote storage
Browse files Browse the repository at this point in the history
Signed-off-by: Gavin Halliday <[email protected]>
  • Loading branch information
ghalliday committed Apr 2, 2024
1 parent 1358d22 commit 9787fb6
Showing 1 changed file with 22 additions and 6 deletions.
28 changes: 22 additions & 6 deletions system/jhtree/jhtree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ class CNodeMapping : public HTMapping<CNodeCacheEntry, CKeyIdAndPos>
};

typedef OwningSimpleHashTableOf<CNodeMapping, CKeyIdAndPos> CNodeTable;
class CNodeMRUCache : public CMRUCacheOf<CKeyIdAndPos, CNodeCacheEntry, CNodeMapping, CNodeTable>
class CNodeMRUCache final : public CMRUCacheOf<CKeyIdAndPos, CNodeCacheEntry, CNodeMapping, CNodeTable>
{
std::atomic<size32_t> sizeInMem{0};
size32_t memLimit = 0;
Expand All @@ -706,15 +706,31 @@ class CNodeMRUCache : public CMRUCacheOf<CKeyIdAndPos, CNodeCacheEntry, CNodeMap
virtual void makeSpace()
{
// remove LRU until !full
// This code could walk the list, rather than restarting at the end each time - but there are unlikely to be
// many entries that have no associated node, and nodes could have been associated in the meantime.
do
{
//Never evict an entry that hasn't yet loaded - otherwise the sizeInMem can become inconsistent
CNodeMapping *tail = mruList.tail();
assertex(tail);
if (!tail->queryElement().isReady() )
break;
if (unlikely(!tail))
throw makeStringExceptionV(9999, "Index cache appears full but contains no entries size=%x limit=%x", sizeInMem.load(), memLimit);

//Never evict an entry that hasn't yet loaded - otherwise the sizeInMem can become inconsistent
//When running with slow remote storage this can take a long time to be ready - so we need
//to walk on to the next entry in the lrulist, otherwise we can run out of memory since nothing
//would be removed.
while (!tail->queryElement().isReady())
{
tail = tail->prev;
if (!tail)
{
// no pages in the cache are ready - this could possibly happen in a tiny race-window where
// sizes in the cache have been updated, but no nodes have yet been associated with the entries.
return;
}
}

clear(1);
mruList.remove(tail);
table.removeExact(tail);
}
while (full());
}
Expand Down

0 comments on commit 9787fb6

Please sign in to comment.