From a9f157b648ccedf302d66ea1c3938a6b9ba0b2d5 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Thu, 25 Apr 2024 14:30:16 +0300 Subject: [PATCH] database: apply_in_memory: unfreeze_gently large mutations Prevent stalls coming from applying large mutations in memory synchronously, like the ones seen with the test_add_many_nodes_under_load dtest: ``` | | | ++[5#2/2 44%] addr=0x1498efb total=256 count=3 avg=85: | | | | replica::memtable::apply(frozen_mutation const&, seastar::lw_shared_ptr const&, db::rp_handle&&)::$_0::operator()() const::{lambda()#1}::operator() at ./replica/memtable.cc:804 | | | | (inlined by) logalloc::allocating_section::with_reclaiming_disabled const&, db::rp_handle&&)::$_0::operator()() const::{lambda()#1}&> at ././utils/logalloc.hh:500 | | | | (inlined by) logalloc::allocating_section::operator() const&, db::rp_handle&&)::$_0::operator()() const::{lambda()#1}>(logalloc::region&, replica::memtable::apply(frozen_mutation const&, seastar::lw_shared_ptr const&, db::rp_handle&&)::$_0::operator()() const::{lambda()#1}&&)::{lambda()#1}::operator() at ././utils/logalloc.hh:527 | | | | (inlined by) logalloc::allocating_section::with_reserve const&, db::rp_handle&&)::$_0::operator()() const::{lambda()#1}>(logalloc::region&, replica::memtable::apply(frozen_mutation const&, seastar::lw_shared_ptr const&, db::rp_handle&&)::$_0::operator()() const::{lambda()#1}&&)::{lambda()#1}> at ././utils/logalloc.hh:471 | | | | (inlined by) logalloc::allocating_section::operator() const&, db::rp_handle&&)::$_0::operator()() const::{lambda()#1}> at ././utils/logalloc.hh:526 | | | | (inlined by) replica::memtable::apply(frozen_mutation const&, seastar::lw_shared_ptr const&, db::rp_handle&&)::$_0::operator() at ./replica/memtable.cc:800 | | | | (inlined by) with_allocator const&, db::rp_handle&&)::$_0> at ././utils/allocation_strategy.hh:318 | | | | (inlined by) replica::memtable::apply at ./replica/memtable.cc:799 | | | ++[6#1/1 100%] addr=0x145047b total=1731 count=21 avg=82: | | | | replica::table::do_apply&> at ./replica/table.cc:2896 | | | ++[7#1/1 100%] addr=0x13ddccb total=2852 count=32 avg=89: | | | | replica::table::apply(frozen_mutation const&, seastar::lw_shared_ptr, db::rp_handle&&, std::chrono::time_point > >)::$_0::operator() at ./replica/table.cc:2924 | | | | (inlined by) seastar::futurize::invoke, db::rp_handle&&, std::chrono::time_point > >)::$_0&> at ././seastar/include/seastar/core/future.hh:2032 | | | | (inlined by) seastar::futurize_invoke, db::rp_handle&&, std::chrono::time_point > >)::$_0&> at ././seastar/include/seastar/core/future.hh:2066 | | | | (inlined by) replica::dirty_memory_manager_logalloc::region_group::run_when_memory_available, db::rp_handle&&, std::chrono::time_point > >)::$_0> at ./replica/dirty_memory_manager.hh:572 | | | | (inlined by) replica::table::apply at ./replica/table.cc:2923 | | | ++ - addr=0x1330ba1: | | | | replica::database::apply_in_memory at ./replica/database.cc:1812 | | | ++ - addr=0x1360054: | | | | replica::database::do_apply at ./replica/database.cc:2032 ``` This change has virtually no effect on small mutations (up to 128KB in size). build/release/scylla perf-simple-query --write --default-log-level=error --random-seed=1 -c 1 Before: median 80092.06 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 53291 insns/op, 0 errors) After: median 78780.86 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 53311 insns/op, 0 errors) To estimate the performance ramifications on large mutations, I measured perf-simple-query --write calling unfreeze_gently in all cases: median 77411.26 tps ( 71.3 allocs/op, 8.0 logallocs/op, 14.3 tasks/op, 53280 insns/op, 0 errors) Showing the allocations that moved out of logalloc (in memtable::apply of frozen_mutation) into seastar allocations (in unfreeze_gently) and <1% cpu overhead. Signed-off-by: Benny Halevy --- replica/database.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/replica/database.cc b/replica/database.cc index a69214280fa7..ed614fd29a5c 100644 --- a/replica/database.cc +++ b/replica/database.cc @@ -39,6 +39,7 @@ #include #include #include "mutation/frozen_mutation.hh" +#include "mutation/async_utils.hh" #include #include "service/migration_listener.hh" #include "cell_locking.hh" @@ -1808,6 +1809,14 @@ future<> database::apply_in_memory(const frozen_mutation& m, schema_ptr m_schema data_listeners().on_write(m_schema, m); + if (m.representation().size() > 128*1024) { + return unfreeze_gently(m, std::move(m_schema)).then([&cf, h = std::move(h), timeout] (auto m) mutable { + return do_with(std::move(m), [&cf, h = std::move(h), timeout] (auto& m) mutable { + return cf.apply(m, std::move(h), timeout); + }); + }); + } + return cf.apply(m, std::move(m_schema), std::move(h), timeout); }