From 9787fb63216e3fe0cb8447eb6c73672b7bc8c2b1 Mon Sep 17 00:00:00 2001 From: Gavin Halliday Date: Thu, 28 Mar 2024 17:37:24 +0000 Subject: [PATCH] HPCC-31539 Avoid unbounded index cache with slow remote storage Signed-off-by: Gavin Halliday --- system/jhtree/jhtree.cpp | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/system/jhtree/jhtree.cpp b/system/jhtree/jhtree.cpp index d906ba1b34f..e051865a5a1 100644 --- a/system/jhtree/jhtree.cpp +++ b/system/jhtree/jhtree.cpp @@ -690,7 +690,7 @@ class CNodeMapping : public HTMapping }; typedef OwningSimpleHashTableOf CNodeTable; -class CNodeMRUCache : public CMRUCacheOf +class CNodeMRUCache final : public CMRUCacheOf { std::atomic sizeInMem{0}; size32_t memLimit = 0; @@ -706,15 +706,31 @@ class CNodeMRUCache : public CMRUCacheOfqueryElement().isReady() ) - break; + if (unlikely(!tail)) + throw makeStringExceptionV(9999, "Index cache appears full but contains no entries size=%x limit=%x", sizeInMem.load(), memLimit); + + //Never evict an entry that hasn't yet loaded - otherwise the sizeInMem can become inconsistent + //When running with slow remote storage this can take a long time to be ready - so we need + //to walk on to the next entry in the lrulist, otherwise we can run out of memory since nothing + //would be removed. + while (!tail->queryElement().isReady()) + { + tail = tail->prev; + if (!tail) + { + // no pages in the cache are ready - this could possibly happen in a tiny race-window where + // sizes in the cache have been updated, but no nodes have yet been associated with the entries. + return; + } + } - clear(1); + mruList.remove(tail); + table.removeExact(tail); } while (full()); }