From 96bc78905d1f9d7da70402355b42d71d84cede09 Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Fri, 23 Jun 2023 17:02:09 +0200 Subject: [PATCH] readers: evictable_reader: don't accidentally consume the entire partition The evictable reader must ensure that each buffer fill makes forward progress, i.e. the last fragment in the buffer has a position larger than the last fragment from the previous buffer-fill. Otherwise, the reader could get stuck in an infinite loop between buffer fills, if the reader is evicted in-between. The code guranteeing this forward progress had a bug: the comparison between the position after the last buffer-fill and the current last fragment position was done in the wrong direction. So if the condition that we wanted to achieve was already true, we would continue filling the buffer until partition end which may lead to OOMs such as in #13491. There was already a fix in this area to handle `partition_start` fragments correctly - #13563 - but it missed that the position comparison was done in the wrong order. Fix the comparison and adjust one of the tests (added in #13563) to detect this case. Fixes #13491 --- readers/multishard.cc | 2 +- test/boost/mutation_reader_test.cc | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/readers/multishard.cc b/readers/multishard.cc index 4c65b71e014a..3abfecc6318b 100644 --- a/readers/multishard.cc +++ b/readers/multishard.cc @@ -607,7 +607,7 @@ future<> evictable_reader_v2::fill_buffer() { // First make sure we've made progress w.r.t. _next_position_in_partition. // This loop becomes inifinite when next pos is a partition start. // In that case progress is guranteed anyway, so skip this loop entirely. - while (!_next_position_in_partition.is_partition_start() && next_mf && _tri_cmp(_next_position_in_partition, buffer().back().position()) <= 0) { + while (!_next_position_in_partition.is_partition_start() && next_mf && _tri_cmp(buffer().back().position(), _next_position_in_partition) <= 0) { push_mutation_fragment(_reader->pop_mutation_fragment()); next_mf = co_await _reader->peek(); } diff --git a/test/boost/mutation_reader_test.cc b/test/boost/mutation_reader_test.cc index 1773392d814d..be9b7cb736b8 100644 --- a/test/boost/mutation_reader_test.cc +++ b/test/boost/mutation_reader_test.cc @@ -3648,9 +3648,13 @@ SEASTAR_THREAD_TEST_CASE(test_evictable_reader_next_pos_is_partition_start) { auto stop_rd = deferred_close(rd); rd.set_max_buffer_size(max_buf_size); + // #13491 - the reader must not consume the entire partition but a small batch of fragments based on the buffer size. + rd.fill_buffer().get(); rd.fill_buffer().get(); auto buf1 = rd.detach_buffer(); - BOOST_REQUIRE_EQUAL(buf1.size(), 3); + // There should be 6-7 fragments, but to avoid computing the exact number of fragments that should fit in `max_buf_size`, + // just ensure that there are <= 10 (consuming the whole partition would give ~1000 fragments). + BOOST_REQUIRE_LE(buf1.size(), 10); } struct mutation_bounds {