Skip to content

Commit

Permalink
Reduce the number of lists used for dense spans in the HugePageFiller
Browse files Browse the repository at this point in the history
This is an experimental change.  We know that dense spans only use one TCMalloc
page each.  So hugepages with dense spans do not need to maintain longest free
range and chunks to pick the best possible candidate for a new span allocation.
In this change, we use kPagesPerHugepage length array of linked lists for
holding hugepages with dense spans.  Index 0 is for holding hugepages with all
pages allocated, index 1 for kPagesPerHugepage-1 pages allocated, and so on.

PiperOrigin-RevId: 666047833
Change-Id: I58daa5678b06c0f25338e4e21a825d5c4f5268f5
  • Loading branch information
nilayvaish authored and copybara-github committed Aug 21, 2024
1 parent 3540420 commit 37270f1
Show file tree
Hide file tree
Showing 10 changed files with 1,598 additions and 414 deletions.
2 changes: 2 additions & 0 deletions tcmalloc/experiment_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ enum class Experiment : int {
TCMALLOC_RESIZE_SIZE_CLASS_MAX_CAPACITY, // TODO(b/123345734): Complete experiment.
TEST_ONLY_TCMALLOC_BIG_SPAN, // TODO(b/304135905): Complete experiment.
TEST_ONLY_L3_AWARE, // TODO(b/239977380): Complete experiment.
TEST_ONLY_TCMALLOC_DENSE_TRACKERS_SORTED_ON_SPANS_ALLOCATED, // TODO(b/348043731): Complete experiment.
kMaxExperimentID,
// clang-format on
};
Expand All @@ -51,6 +52,7 @@ inline constexpr ExperimentConfig experiments[] = {
{Experiment::TCMALLOC_RESIZE_SIZE_CLASS_MAX_CAPACITY, "TCMALLOC_RESIZE_SIZE_CLASS_MAX_CAPACITY"},
{Experiment::TEST_ONLY_TCMALLOC_BIG_SPAN, "TEST_ONLY_TCMALLOC_BIG_SPAN"},
{Experiment::TEST_ONLY_L3_AWARE, "TEST_ONLY_L3_AWARE"},
{Experiment::TEST_ONLY_TCMALLOC_DENSE_TRACKERS_SORTED_ON_SPANS_ALLOCATED, "TEST_ONLY_TCMALLOC_DENSE_TRACKERS_SORTED_ON_SPANS_ALLOCATED"},
};
// clang-format on

Expand Down
5 changes: 5 additions & 0 deletions tcmalloc/global_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,9 @@ void DumpStats(Printer* out, int level) {
out->printf("PARAMETER madvise %s\n", MadviseString());
out->printf("PARAMETER tcmalloc_resize_size_class_max_capacity %d\n",
Parameters::resize_size_class_max_capacity() ? 1 : 0);
out->printf(
"PARAMETER tcmalloc_dense_trackers_sorted_on_spans_allocated %d\n",
Parameters::dense_trackers_sorted_on_spans_allocated() ? 1 : 0);
}
}

Expand Down Expand Up @@ -757,6 +760,8 @@ void DumpStatsInPbtxt(Printer* out, int level) {
region.PrintBool("tcmalloc_configure_size_class_max_capacity",
tc_globals.cpu_cache().ConfigureSizeClassMaxCapacity());
region.PrintI64("span_max_cache_size", Parameters::max_span_cache_size());
region.PrintBool("tcmalloc_dense_trackers_sorted_on_spans_allocated",
Parameters::dense_trackers_sorted_on_spans_allocated());

region.PrintRaw(
"size_class_config",
Expand Down
6 changes: 5 additions & 1 deletion tcmalloc/huge_page_aware_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ class StaticForwarder {
struct HugePageAwareAllocatorOptions {
MemoryTag tag;
HugeRegionUsageOption use_huge_region_more_often = huge_region_option();
HugePageFillerDenseTrackerType dense_tracker_type =
Parameters::dense_trackers_sorted_on_spans_allocated()
? HugePageFillerDenseTrackerType::kSpansAllocated
: HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks;
absl::Duration huge_cache_time = Parameters::huge_cache_release_time();
};

Expand Down Expand Up @@ -396,7 +400,7 @@ inline HugePageAwareAllocator<Forwarder>::HugePageAwareAllocator(
: PageAllocatorInterface("HugePageAware", options.tag),
unback_(*this),
unback_without_lock_(*this),
filler_(unback_, unback_without_lock_),
filler_(options.dense_tracker_type, unback_, unback_without_lock_),
regions_(options.use_huge_region_more_often),
vm_allocator_(*this),
metadata_allocator_(*this),
Expand Down
23 changes: 20 additions & 3 deletions tcmalloc/huge_page_aware_allocator_fuzz.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "absl/time/time.h"
#include "tcmalloc/common.h"
#include "tcmalloc/huge_page_aware_allocator.h"
#include "tcmalloc/huge_page_filler.h"
#include "tcmalloc/huge_pages.h"
#include "tcmalloc/huge_region.h"
#include "tcmalloc/internal/logging.h"
Expand Down Expand Up @@ -84,7 +85,7 @@ void FuzzHPAA(const std::string& s) {
// [1] - HugeRegionsMode.
// [2] - HugeCache release time
// [3:4] - Reserved.
// [5] - (available)
// [5] - Dense tracker type
// [6:12] - Reserved.
//
// TODO(b/271282540): Convert these to strongly typed fuzztest parameters.
Expand Down Expand Up @@ -113,6 +114,11 @@ void FuzzHPAA(const std::string& s) {
? HugeRegionUsageOption::kDefault
: HugeRegionUsageOption::kUseForAllLargeAllocs;

const HugePageFillerDenseTrackerType dense_tracker_type =
static_cast<uint8_t>(data[5]) >= 128
? HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks
: HugePageFillerDenseTrackerType::kSpansAllocated;

const int32_t huge_cache_release_s = std::max<int32_t>(data[2], 1);

// data[6:12] - Reserve additional bytes for any features we might want to add
Expand All @@ -128,6 +134,7 @@ void FuzzHPAA(const std::string& s) {
options.tag = tag;
options.use_huge_region_more_often = huge_region_option;
options.huge_cache_time = absl::Seconds(huge_cache_release_s);
options.dense_tracker_type = dense_tracker_type;
HugePageAwareAllocator<FakeStaticForwarderWithUnback>* allocator;
allocator =
new (p) HugePageAwareAllocator<FakeStaticForwarderWithUnback>(options);
Expand Down Expand Up @@ -162,8 +169,8 @@ void FuzzHPAA(const std::string& s) {
// value[48] - Should we use aligned allocate?
// value[49] - Is the span sparsely- or densely-accessed?
// value[63:50] - Reserved.
const Length length(std::clamp<size_t>(
value & 0xFFFF, 1, kPagesPerHugePage.raw_num() - 1));
Length length(std::clamp<size_t>(value & 0xFFFF, 1,
kPagesPerHugePage.raw_num() - 1));
size_t num_objects = std::max<size_t>((value >> 16) & 0xFFFF, 1);
size_t object_size = length.in_bytes() / num_objects;
const bool use_aligned = ((value >> 48) & 0x1) == 0;
Expand All @@ -186,6 +193,11 @@ void FuzzHPAA(const std::string& s) {
// This is an invalid size class, so skip it.
break;
}
if (dense_tracker_type ==
HugePageFillerDenseTrackerType::kSpansAllocated &&
density == AccessDensityPrediction::kDense) {
length = Length(1);
}

// Allocation is too big for filler if we try to allocate >
// kPagesPerHugePage / 2 run of pages. The allocations may go to
Expand All @@ -198,6 +210,11 @@ void FuzzHPAA(const std::string& s) {
Span* s;
SpanAllocInfo alloc_info = {.objects_per_span = num_objects,
.density = density};
TC_CHECK(
dense_tracker_type ==
HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks ||
density == AccessDensityPrediction::kSparse ||
length == Length(1));
if (use_aligned) {
s = allocator->NewAligned(length, align, alloc_info);
} else {
Expand Down
90 changes: 63 additions & 27 deletions tcmalloc/huge_page_filler.h
Original file line number Diff line number Diff line change
Expand Up @@ -244,18 +244,30 @@ struct HugePageFillerStats {
HugeLength n_partial[AccessDensityPrediction::kPredictionCounts + 1];
};

enum class HugePageFillerDenseTrackerType : bool {
// Hugepages sorted on longest free range and chunk index. This is currently
// the default.
kLongestFreeRangeAndChunks,
// Hugepages sorted only on number of spans allocated. As we allocate
// single-page many-object spans, we do not sort hugepages on longest free
// range when this configuration is used.
kSpansAllocated,
};

// This tracks a set of unfilled hugepages, and fulfills allocations
// with a goal of filling some hugepages as tightly as possible and emptying
// out the remainder.
template <class TrackerType>
class HugePageFiller {
public:
explicit HugePageFiller(
HugePageFillerDenseTrackerType dense_tracker_type,
MemoryModifyFunction& unback ABSL_ATTRIBUTE_LIFETIME_BOUND,
MemoryModifyFunction& unback_without_lock ABSL_ATTRIBUTE_LIFETIME_BOUND);
HugePageFiller(
Clock clock, MemoryModifyFunction& unback ABSL_ATTRIBUTE_LIFETIME_BOUND,
MemoryModifyFunction& unback_without_lock ABSL_ATTRIBUTE_LIFETIME_BOUND);
HugePageFiller(Clock clock, HugePageFillerDenseTrackerType dense_tracker_type,
MemoryModifyFunction& unback ABSL_ATTRIBUTE_LIFETIME_BOUND,
MemoryModifyFunction& unback_without_lock
ABSL_ATTRIBUTE_LIFETIME_BOUND);

typedef TrackerType Tracker;

Expand Down Expand Up @@ -405,7 +417,8 @@ class HugePageFiller {
// pt has a single allocation.
size_t IndexFor(TrackerType* pt) const;
// Returns index for regular_alloc_.
size_t ListFor(Length longest, size_t chunk) const;
size_t ListFor(Length longest, size_t chunk, AccessDensityPrediction density,
size_t nallocs) const;
static constexpr size_t kNumLists = kPagesPerHugePage.raw_num() * kChunks;

// List of hugepages from which no pages have been released to the OS.
Expand Down Expand Up @@ -433,6 +446,7 @@ class HugePageFiller {
// n_used_partial_released_ is the number of pages which have been allocated
// from the hugepages in the set regular_alloc_partial_released.
Length n_used_partial_released_[AccessDensityPrediction::kPredictionCounts];
const HugePageFillerDenseTrackerType dense_tracker_type_;

// RemoveFromFillerList pt from the appropriate PageTrackerList.
void RemoveFromFillerList(TrackerType* pt);
Expand Down Expand Up @@ -624,17 +638,19 @@ inline Length PageTracker::free_pages() const {

template <class TrackerType>
inline HugePageFiller<TrackerType>::HugePageFiller(
HugePageFillerDenseTrackerType dense_tracker_type,
MemoryModifyFunction& unback, MemoryModifyFunction& unback_without_lock)
: HugePageFiller(Clock{.now = absl::base_internal::CycleClock::Now,
.freq = absl::base_internal::CycleClock::Frequency},
unback, unback_without_lock) {}
dense_tracker_type, unback, unback_without_lock) {}

// For testing with mock clock
template <class TrackerType>
inline HugePageFiller<TrackerType>::HugePageFiller(
Clock clock, MemoryModifyFunction& unback,
MemoryModifyFunction& unback_without_lock)
: size_(NHugePages(0)),
Clock clock, HugePageFillerDenseTrackerType dense_tracker_type,
MemoryModifyFunction& unback, MemoryModifyFunction& unback_without_lock)
: dense_tracker_type_(dense_tracker_type),
size_(NHugePages(0)),
fillerstats_tracker_(clock, absl::Minutes(10), absl::Minutes(5)),
clock_(clock),
unback_(unback),
Expand All @@ -644,6 +660,10 @@ template <class TrackerType>
inline typename HugePageFiller<TrackerType>::TryGetResult
HugePageFiller<TrackerType>::TryGet(Length n, SpanAllocInfo span_alloc_info) {
TC_ASSERT_GT(n, Length(0));
TC_ASSERT(dense_tracker_type_ ==
HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks ||
span_alloc_info.density == AccessDensityPrediction::kSparse ||
n == Length(1));

// How do we choose which hugepage to allocate from (among those with
// a free range of at least n?) Our goal is to be as space-efficient
Expand Down Expand Up @@ -715,7 +735,8 @@ HugePageFiller<TrackerType>::TryGet(Length n, SpanAllocInfo span_alloc_info) {
bool was_released = false;
const AccessDensityPrediction type = span_alloc_info.density;
do {
pt = regular_alloc_[type].GetLeast(ListFor(n, 0));
pt = regular_alloc_[type].GetLeast(
ListFor(n, 0, type, kPagesPerHugePage.raw_num() - 1));
if (pt) {
TC_ASSERT(!pt->donated());
break;
Expand All @@ -726,15 +747,17 @@ HugePageFiller<TrackerType>::TryGet(Length n, SpanAllocInfo span_alloc_info) {
break;
}
}
pt = regular_alloc_partial_released_[type].GetLeast(ListFor(n, 0));
pt = regular_alloc_partial_released_[type].GetLeast(
ListFor(n, 0, type, kPagesPerHugePage.raw_num() - 1));
if (pt) {
TC_ASSERT(!pt->donated());
was_released = true;
TC_ASSERT_GE(n_used_partial_released_[type], pt->used_pages());
n_used_partial_released_[type] -= pt->used_pages();
break;
}
pt = regular_alloc_released_[type].GetLeast(ListFor(n, 0));
pt = regular_alloc_released_[type].GetLeast(
ListFor(n, 0, type, kPagesPerHugePage.raw_num() - 1));
if (pt) {
TC_ASSERT(!pt->donated());
was_released = true;
Expand Down Expand Up @@ -1518,16 +1541,18 @@ class UsageInfo {
template <class TrackerType>
inline HugePageFillerStats HugePageFiller<TrackerType>::GetStats() const {
HugePageFillerStats stats;

// note kChunks, not kNumLists here--we're iterating *full* lists.
// Note kChunks, not kNumLists here--we're iterating *full* lists.
for (size_t chunk = 0; chunk < kChunks; ++chunk) {
stats.n_full[AccessDensityPrediction::kSparse] +=
NHugePages(regular_alloc_[AccessDensityPrediction::kSparse]
[ListFor(/*longest=*/Length(0), chunk)]
.length());
stats.n_full[AccessDensityPrediction::kSparse] += NHugePages(
regular_alloc_[AccessDensityPrediction::kSparse]
[ListFor(/*longest=*/Length(0), chunk,
AccessDensityPrediction::kSparse, /*nallocs=*/0)]
.length());
stats.n_full[AccessDensityPrediction::kDense] +=
NHugePages(regular_alloc_[AccessDensityPrediction::kDense]
[ListFor(/*longest=*/Length(0), chunk)]
[ListFor(/*longest=*/Length(0), chunk,
AccessDensityPrediction::kDense,
kPagesPerHugePage.raw_num())]
.length());
}
stats.n_full[AccessDensityPrediction::kPredictionCounts] =
Expand Down Expand Up @@ -1821,7 +1846,6 @@ inline void HugePageFiller<TrackerType>::PrintInPbtxt(PbtxtRegion* hpaa) const {
0);

usage.Print(hpaa);

fillerstats_tracker_.PrintSubreleaseStatsInPbtxt(hpaa,
"filler_skipped_subrelease");
fillerstats_tracker_.PrintTimeseriesStatsInPbtxt(hpaa,
Expand Down Expand Up @@ -1877,11 +1901,25 @@ inline size_t HugePageFiller<TrackerType>::IndexFor(TrackerType* pt) const {
}

template <class TrackerType>
inline size_t HugePageFiller<TrackerType>::ListFor(const Length longest,
const size_t chunk) const {
inline size_t HugePageFiller<TrackerType>::ListFor(
const Length longest, const size_t chunk,
const AccessDensityPrediction density, size_t nallocs) const {
TC_ASSERT_LT(chunk, kChunks);
TC_ASSERT_LT(longest, kPagesPerHugePage);
return longest.raw_num() * kChunks + chunk;
if (ABSL_PREDICT_TRUE(
density == AccessDensityPrediction::kSparse ||
dense_tracker_type_ ==
HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks)) {
TC_ASSERT_LT(longest, kPagesPerHugePage);
return longest.raw_num() * kChunks + chunk;
}
TC_ASSERT(density == AccessDensityPrediction::kDense);
TC_ASSERT(dense_tracker_type_ ==
HugePageFillerDenseTrackerType::kSpansAllocated);
TC_ASSERT_LE(nallocs, kPagesPerHugePage.raw_num());
// For the dense tracker with hugepages sorted on allocs, the hugepages are
// placed only in lists that are multiples of kChunks. The in-between lists
// are empty.
return (kPagesPerHugePage.raw_num() - nallocs) * kChunks + chunk;
}

template <class TrackerType>
Expand All @@ -1894,12 +1932,11 @@ inline void HugePageFiller<TrackerType>::RemoveFromFillerList(TrackerType* pt) {
return;
}

size_t chunk = IndexFor(pt);
size_t i = ListFor(longest, chunk);
const AccessDensityPrediction type =
pt->HasDenseSpans()
? AccessDensityPrediction::kDense
: AccessDensityPrediction::kSparse;
size_t i = ListFor(longest, IndexFor(pt), type, pt->nallocs());

if (!pt->released()) {
regular_alloc_[type].Remove(pt, i);
Expand All @@ -1916,7 +1953,6 @@ inline void HugePageFiller<TrackerType>::RemoveFromFillerList(TrackerType* pt) {

template <class TrackerType>
inline void HugePageFiller<TrackerType>::AddToFillerList(TrackerType* pt) {
size_t chunk = IndexFor(pt);
Length longest = pt->longest_free_range();
TC_ASSERT_LT(longest, kPagesPerHugePage);

Expand All @@ -1926,11 +1962,11 @@ inline void HugePageFiller<TrackerType>::AddToFillerList(TrackerType* pt) {
// donated allocs.
pt->set_donated(false);

size_t i = ListFor(longest, chunk);
const AccessDensityPrediction type =
pt->HasDenseSpans()
? AccessDensityPrediction::kDense
: AccessDensityPrediction::kSparse;
size_t i = ListFor(longest, IndexFor(pt), type, pt->nallocs());

if (!pt->released()) {
regular_alloc_[type].Add(pt, i);
Expand Down
17 changes: 13 additions & 4 deletions tcmalloc/huge_page_filler_fuzz.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ void FuzzFiller(const std::string& s) {
// We interpret data as a small DSL for exploring the state space of
// HugePageFiller.
//
// [0] - (available)
// [0] - used for choosing dense tracker type.
// [1] - (available)
// [2] - (available)
//
Expand All @@ -116,11 +116,15 @@ void FuzzFiller(const std::string& s) {
// For example, this input can provide a Length to
// allocate, or the index of the previous allocation to
// deallocate.
const HugePageFillerDenseTrackerType dense_tracker_type =
static_cast<uint8_t>(data[0]) >= 128
? HugePageFillerDenseTrackerType::kLongestFreeRangeAndChunks
: HugePageFillerDenseTrackerType::kSpansAllocated;
data += kInitBytes;
size -= kInitBytes;

HugePageFiller<PageTracker> filler(Clock{.now = mock_clock, .freq = freq},
unback, unback);
dense_tracker_type, unback, unback);

struct Alloc {
PageId page;
Expand All @@ -144,8 +148,8 @@ void FuzzFiller(const std::string& s) {
//
// value[0:15] - We choose a Length to allocate.
// value[16:31] - We select num_to_objects.
const Length n(std::clamp<size_t>(value & 0xFFFF, 1,
kPagesPerHugePage.raw_num() - 1));
Length n(std::clamp<size_t>(value & 0xFFFF, 1,
kPagesPerHugePage.raw_num() - 1));
AccessDensityPrediction density;
const uint32_t lval = (value >> 16);
// Choose many objects if the last bit is 1.
Expand All @@ -163,6 +167,11 @@ void FuzzFiller(const std::string& s) {
num_objects = 1;
density = AccessDensityPrediction::kSparse;
}
if (dense_tracker_type ==
HugePageFillerDenseTrackerType::kSpansAllocated &&
density == AccessDensityPrediction::kDense) {
n = Length(1);
}

SpanAllocInfo alloc_info = {.objects_per_span = num_objects,
.density = density};
Expand Down
Loading

0 comments on commit 37270f1

Please sign in to comment.