From 74e14f8ea1adcb302c8e46b7e11a7788613ee3d3 Mon Sep 17 00:00:00 2001 From: Gavin Halliday Date: Sat, 12 Oct 2024 00:03:32 +0100 Subject: [PATCH 1/5] HPCC-32791 Partition the index LRU cache to reduce contention Signed-off-by: Gavin Halliday --- system/jhtree/jhtree.cpp | 180 +++++++++++++++++++++++++++++---------- system/jlib/jstatcodes.h | 2 + system/jlib/jstats.cpp | 2 + 3 files changed, 141 insertions(+), 43 deletions(-) diff --git a/system/jhtree/jhtree.cpp b/system/jhtree/jhtree.cpp index 956118e6faf..1d6933210d8 100644 --- a/system/jhtree/jhtree.cpp +++ b/system/jhtree/jhtree.cpp @@ -695,40 +695,19 @@ class DelayedCacheEntryReleaser : public IRemovedMappingCallback }; typedef OwningSimpleHashTableOf CNodeTable; -class CNodeMRUCache final : public CMRUCacheOf +class CNodeMRUSubCache final : public CMRUCacheOf { std::atomic sizeInMem{0}; size32_t memLimit = 0; - std::shared_ptr>> pNumHits = nullptr; - std::shared_ptr>> pNumAdds = nullptr; - std::shared_ptr>> pNumDups = nullptr; - std::shared_ptr>> pNumEvicts = nullptr; public: + mutable CriticalSection lock; RelaxedAtomic<__uint64> numHits{0}; RelaxedAtomic<__uint64> numAdds{0}; RelaxedAtomic<__uint64> numDups{0}; RelaxedAtomic<__uint64> numEvicts{0}; - bool enabled = false; - CNodeMRUCache(CacheType cacheType) - { - StringBuffer name, desc; - const char *typeText = cacheTypeText[cacheType]; - name.appendf("jhtree.cache.%s.hits", typeText); - desc.appendf("The number of cache hits on the jhtree %s cache", typeText); - pNumHits = hpccMetrics::registerCustomMetric(name, desc, hpccMetrics::METRICS_COUNTER, numHits, SMeasureCount); - name.clear().appendf("jhtree.cache.%s.adds", typeText); - desc.clear().appendf("The number of cache adds to the jhtree %s cache", typeText); - pNumAdds = hpccMetrics::registerCustomMetric(name, desc, hpccMetrics::METRICS_COUNTER, numAdds, SMeasureCount); - name.clear().appendf("jhtree.cache.%s.dups", typeText); - desc.clear().appendf("The number of cache add collisions on the jhtree %s cache", typeText); - pNumDups = hpccMetrics::registerCustomMetric(name, desc, hpccMetrics::METRICS_COUNTER, numDups, SMeasureCount); - name.clear().appendf("jhtree.cache.%s.evictions", typeText); - desc.clear().appendf("The number of nodes evicted from the jhtree %s cache", typeText); - pNumEvicts = hpccMetrics::registerCustomMetric(name, desc, hpccMetrics::METRICS_COUNTER, numEvicts, SMeasureCount); - } + size32_t setMemLimit(size32_t _memLimit) { - enabled = _memLimit != 0; size32_t oldMemLimit = memLimit; memLimit = _memLimit; if (full()) @@ -784,7 +763,7 @@ class CNodeMRUCache final : public CMRUCacheOf iter = getIterator(); + Owned iter = getIterator(); ForEach(*iter) { CNodeMapping &mapping = iter->query(); @@ -804,24 +783,127 @@ class CNodeMRUCache final : public CMRUCacheOf pMetric = std::shared_ptr(new CacheMetric(name, desc, kind, *this)); + hpccMetrics::queryMetricsManager().addMetric(pMetric); + metrics.push_back(std::move(pMetric)); + } + +public: + CNodeMRUCache(CacheType cacheType=CacheBranch) + { + StringBuffer name, desc; + const char *typeText = cacheTypeText[cacheType]; + name.appendf("jhtree.cache.%s.hits", typeText); + desc.appendf("The number of cache hits on the jhtree %s cache", typeText); + createCacheMetric(name, desc, StNumCacheHits); + name.clear().appendf("jhtree.cache.%s.adds", typeText); + desc.clear().appendf("The number of cache adds to the jhtree %s cache", typeText); + createCacheMetric(name, desc, StNumCacheAdds); + name.clear().appendf("jhtree.cache.%s.dups", typeText); + desc.clear().appendf("The number of cache add collisions on the jhtree %s cache", typeText); + createCacheMetric(name, desc, StNumCacheDuplicates); + name.clear().appendf("jhtree.cache.%s.evictions", typeText); + desc.clear().appendf("The number of nodes evicted from the jhtree %s cache", typeText); + createCacheMetric(name, desc, StNumCacheEvictions); + } + + __uint64 getStatisticValue(StatisticKind kind) const + { + __uint64 total = 0; + for (unsigned i =0; i < cacheBuckets; i++) + total += cache[i].getStatisticValue(kind); + return total; + } + + size32_t setCacheMem(size32_t newSize) + { + unsigned oldV = 0; + for (unsigned i=0; i < cacheBuckets; i++) + { + CriticalBlock block(cache[i].lock); + oldV += cache[i].setMemLimit(newSize/cacheBuckets); + } + enabled = newSize != 0; + return oldV; + } + + void traceState(StringBuffer & out) + { if (enabled) { - out.append(table.ordinality()).append(":").append(sizeInMem); - out.appendf(" [%" I64F "u:%" I64F "u:%" I64F "u:%" I64F "u]", numHits.load(), numAdds.load(), numDups.load(), numEvicts.load()); + for (unsigned j=0; j < cacheBuckets; j++) + { + if (j) + out.append(' '); + cache[j].traceState(out); + } } else { out.append("[disabled]"); } } + CNodeMRUSubCache cache[cacheBuckets]; + bool enabled; +protected: + std::vector> metrics; }; - class CNodeCache : public CInterface { private: - mutable CriticalSection lock[CacheMax]; - CNodeMRUCache cache[CacheMax] = { CacheBranch, CacheLeaf, CacheBlob }; + CNodeMRUCache cache[CacheMax]; + std::vector> metrics; public: CNodeCache(size32_t maxNodeMem, size32_t maxLeaveMem, size32_t maxBlobMem) { @@ -849,8 +931,11 @@ class CNodeCache : public CInterface { for (unsigned i=0; i < CacheMax; i++) { - CriticalBlock block(lock[i]); - cache[i].kill(); + for (unsigned j=0; j < cacheBuckets; j++) + { + CriticalBlock block(cache[i].cache[j].lock); + cache[i].cache[j].kill(); + } } } void traceState(StringBuffer & out) @@ -858,7 +943,12 @@ class CNodeCache : public CInterface for (unsigned i=0; i < CacheMax; i++) { out.append(cacheTypeText[i]).append('('); - cache[i].traceState(out); + for (unsigned j=0; j < cacheBuckets; j++) + { + if (j) + out.append(' '); + cache[i].cache[j].traceState(out); + } out.append(") "); } } @@ -872,9 +962,7 @@ class CNodeCache : public CInterface protected: size32_t setCacheMem(size32_t newSize, CacheType type) { - CriticalBlock block(lock[type]); - unsigned oldV = cache[type].setMemLimit(newSize); - return oldV; + return cache[type].setCacheMem(newSize); } }; @@ -2662,8 +2750,11 @@ void CNodeCache::getCacheInfo(ICacheInfoRecorder &cacheInfo) { for (unsigned i = 0; i < CacheMax; i++) { - CriticalBlock block(lock[i]); - cache[i].reportEntries(cacheInfo); + for (unsigned j = 0; j < cacheBuckets; j++) + { + CriticalBlock block(cache[i].cache[j].lock); + cache[i].cache[j].reportEntries(cacheInfo); + } } } @@ -2687,23 +2778,26 @@ const CJHTreeNode *CNodeCache::getCachedNode(const INodeLoader *keyIndex, unsign CacheType cacheType = isTLK ? CacheBranch : (CacheType)type; // check cacheEnabled[cacheType] avoid the critical section (and testing the flag within the critical section) - CNodeMRUCache & curCache = cache[cacheType]; - if (unlikely(!curCache.enabled)) + CNodeMRUCache & typeCache = cache[cacheType]; + if (unlikely(!typeCache.enabled)) return keyIndex->loadNode(nullptr, pos, nullptr); + CKeyIdAndPos key(iD, pos); + unsigned hashcode = typeCache.cache[0].getKeyHash(key); // more: move getKeyHash into typeCache to clean this up + unsigned subCache = cacheBits == 0 ? 0 : hashcode >> cacheShift; + CNodeMRUSubCache & curCache = cache[cacheType].cache[subCache]; + //Previously, this was implemented as: // Lock, unlock. Load the page. Lock, check if it has been added, otherwise add. //Now, it is coded as: // Lock, add if missing, unlock. Lock a page-dependent-cr load() release lock. //There will be the same number of critical section locks, but loading a page will contend on a different lock - so it should reduce contention. - CKeyIdAndPos key(iD, pos); - CriticalSection & cacheLock = lock[cacheType]; + CriticalSection & cacheLock = curCache.lock; Owned ownedCacheEntry; // ensure node gets cleaned up if it fails to load bool alreadyExists = true; { DelayedCacheEntryReleaser delayedReleaser; CNodeCacheEntry * cacheEntry; - unsigned hashcode = curCache.getKeyHash(key); CLeavableCriticalBlock block(cacheLock); cacheEntry = curCache.query(hashcode, &key); diff --git a/system/jlib/jstatcodes.h b/system/jlib/jstatcodes.h index 0c8be6186c2..845d1d08ba5 100644 --- a/system/jlib/jstatcodes.h +++ b/system/jlib/jstatcodes.h @@ -324,6 +324,8 @@ enum StatisticKind StNumCacheHits, // Generic 'cache hit' stats, potentially used for many caches; Roxie-specific stats are above StNumCacheAdds, // Generic 'cache add/miss' stats, potentially used for many caches; Roxie-specific stats are above StNumPeakCacheObjects, // Peak number of objects in a generic cache + StNumCacheDuplicates, + StNumCacheEvictions, StMax, //For any quantity there is potentially the following variants. diff --git a/system/jlib/jstats.cpp b/system/jlib/jstats.cpp index 49d01e9d3b7..99b5844ed10 100644 --- a/system/jlib/jstats.cpp +++ b/system/jlib/jstats.cpp @@ -997,6 +997,8 @@ static const constexpr StatisticMeta statsMetaData[StMax] = { { NUMSTAT(CacheHits), "The number of times an item was retrieved from a cache" }, { NUMSTAT(CacheAdds), "The number of times an item was added to a cache" }, { PEAKNUMSTAT(PeakCacheObjects), "High water mark for number of objects in a cache"}, + { NUMSTAT(CacheDuplicates), "The number of times an item was added to a cache by two threads at the same time" }, + { NUMSTAT(CacheEvictions), "The number of times an item was evicted from a cache" }, }; static MapStringTo statisticNameMap(true); From 9a4801d1a4950c693db0bb682469e9e669c3593d Mon Sep 17 00:00:00 2001 From: Gavin Halliday Date: Thu, 17 Oct 2024 11:11:52 +0100 Subject: [PATCH 2/5] cleanup Signed-off-by: Gavin Halliday --- system/jhtree/jhtree.cpp | 31 ++++++++++++++++--------------- system/jhtree/jhutil.hpp | 2 +- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/system/jhtree/jhtree.cpp b/system/jhtree/jhtree.cpp index 1d6933210d8..1c38716af23 100644 --- a/system/jhtree/jhtree.cpp +++ b/system/jhtree/jhtree.cpp @@ -763,6 +763,7 @@ class CNodeMRUSubCache final : public CMRUCacheOf iter = getIterator(); ForEach(*iter) { @@ -857,6 +858,11 @@ class CNodeMRUCache createCacheMetric(name, desc, StNumCacheEvictions); } + inline unsigned getKeyHash(const CKeyIdAndPos & key) + { + return cache[0].getKeyHash(key); + } + __uint64 getStatisticValue(StatisticKind kind) const { __uint64 total = 0; @@ -865,6 +871,12 @@ class CNodeMRUCache return total; } + void reportEntries(ICacheInfoRecorder &cacheInfo) + { + for (unsigned j = 0; j < cacheBuckets; j++) + cache[j].reportEntries(cacheInfo); + } + size32_t setCacheMem(size32_t newSize) { unsigned oldV = 0; @@ -943,12 +955,7 @@ class CNodeCache : public CInterface for (unsigned i=0; i < CacheMax; i++) { out.append(cacheTypeText[i]).append('('); - for (unsigned j=0; j < cacheBuckets; j++) - { - if (j) - out.append(' '); - cache[i].cache[j].traceState(out); - } + cache[i].traceState(out); out.append(") "); } } @@ -2749,13 +2756,7 @@ extern jhtree_decl void getNodeCacheInfo(ICacheInfoRecorder &cacheInfo) void CNodeCache::getCacheInfo(ICacheInfoRecorder &cacheInfo) { for (unsigned i = 0; i < CacheMax; i++) - { - for (unsigned j = 0; j < cacheBuckets; j++) - { - CriticalBlock block(cache[i].cache[j].lock); - cache[i].cache[j].reportEntries(cacheInfo); - } - } + cache[i].reportEntries(cacheInfo); } //Use a critical section in each node to prevent multiple threads loading the same node at the same time. @@ -2783,9 +2784,9 @@ const CJHTreeNode *CNodeCache::getCachedNode(const INodeLoader *keyIndex, unsign return keyIndex->loadNode(nullptr, pos, nullptr); CKeyIdAndPos key(iD, pos); - unsigned hashcode = typeCache.cache[0].getKeyHash(key); // more: move getKeyHash into typeCache to clean this up + unsigned hashcode = typeCache.getKeyHash(key); unsigned subCache = cacheBits == 0 ? 0 : hashcode >> cacheShift; - CNodeMRUSubCache & curCache = cache[cacheType].cache[subCache]; + CNodeMRUSubCache & curCache = typeCache.cache[subCache]; //Previously, this was implemented as: // Lock, unlock. Load the page. Lock, check if it has been added, otherwise add. diff --git a/system/jhtree/jhutil.hpp b/system/jhtree/jhutil.hpp index 5ca47c4cbbe..0b9d84b6d00 100644 --- a/system/jhtree/jhutil.hpp +++ b/system/jhtree/jhutil.hpp @@ -81,7 +81,7 @@ class CMRUCacheOf : public CInterface//, public IInterface table.replace(*mapping); mruList.enqueueHead(mapping); } - unsigned getKeyHash(KEY & key) const + unsigned getKeyHash(const KEY & key) const { return table.getHashFromFindParam(&key); } From 8807765a4a211fcbef5c001e340226398a3101c8 Mon Sep 17 00:00:00 2001 From: Gavin Halliday Date: Thu, 17 Oct 2024 11:18:59 +0100 Subject: [PATCH 3/5] Othe code cleanup - could push to a different PR Signed-off-by: Gavin Halliday --- system/jhtree/jhtree.cpp | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/system/jhtree/jhtree.cpp b/system/jhtree/jhtree.cpp index 1c38716af23..b04eafdd640 100644 --- a/system/jhtree/jhtree.cpp +++ b/system/jhtree/jhtree.cpp @@ -2837,20 +2837,14 @@ const CJHTreeNode *CNodeCache::getCachedNode(const INodeLoader *keyIndex, unsign try { //Move the atomic increments out of the critical section - they can be relatively expensive - if (likely(alreadyExists)) + if (likely(ctx)) { - if (ctx) ctx->noteStatistic(hitStatId[cacheType], 1); - } - else - { - if (ctx) ctx->noteStatistic(addStatId[cacheType], 1); + if (unlikely(alreadyExists)) + ctx->noteStatistic(hitStatId[cacheType], 1); + else + ctx->noteStatistic(addStatId[cacheType], 1); } - //The common case is that this flag has already been set (by a previous add). - if (likely(ownedCacheEntry->isReady())) - return ownedCacheEntry->getNode(); - - //Shame that the hash code is recalculated - it might be possible to remove this. cycle_t startCycles = get_cycles_now(); cycle_t fetchCycles = 0; cycle_t startLoadCycles; From ab5cfc69b1436570e9a1f247542559ab38db6e65 Mon Sep 17 00:00:00 2001 From: Gavin Halliday Date: Thu, 17 Oct 2024 12:42:51 +0100 Subject: [PATCH 4/5] Metrics not initialised correctly Signed-off-by: Gavin Halliday --- system/jhtree/jhtree.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/system/jhtree/jhtree.cpp b/system/jhtree/jhtree.cpp index b04eafdd640..806dee57e07 100644 --- a/system/jhtree/jhtree.cpp +++ b/system/jhtree/jhtree.cpp @@ -840,7 +840,7 @@ class CNodeMRUCache } public: - CNodeMRUCache(CacheType cacheType=CacheBranch) + CNodeMRUCache(CacheType cacheType) { StringBuffer name, desc; const char *typeText = cacheTypeText[cacheType]; @@ -914,7 +914,7 @@ class CNodeMRUCache class CNodeCache : public CInterface { private: - CNodeMRUCache cache[CacheMax]; + CNodeMRUCache cache[CacheMax] = { CacheBranch, CacheLeaf, CacheBlob }; std::vector> metrics; public: CNodeCache(size32_t maxNodeMem, size32_t maxLeaveMem, size32_t maxBlobMem) From ba9a0c84b5c351644b7d0f2e4d170e06948d2b44 Mon Sep 17 00:00:00 2001 From: Gavin Halliday Date: Tue, 22 Oct 2024 14:31:14 +0100 Subject: [PATCH 5/5] Add clarifying comment Signed-off-by: Gavin Halliday --- system/jhtree/jhtree.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/system/jhtree/jhtree.cpp b/system/jhtree/jhtree.cpp index 806dee57e07..bca8114a58e 100644 --- a/system/jhtree/jhtree.cpp +++ b/system/jhtree/jhtree.cpp @@ -2839,6 +2839,8 @@ const CJHTreeNode *CNodeCache::getCachedNode(const INodeLoader *keyIndex, unsign //Move the atomic increments out of the critical section - they can be relatively expensive if (likely(ctx)) { + //It is most likely that an item has been added - because if the entry already exists + //it will only reach this point if another thread has not already loaded the node. if (unlikely(alreadyExists)) ctx->noteStatistic(hitStatId[cacheType], 1); else