-
Notifications
You must be signed in to change notification settings - Fork 590
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor: fix incorrect skip multi-version key metric and save unneccessary key comparison #14494
Changes from 5 commits
e6301f2
2caa829
edaae7e
e5efde9
daf97b4
18b6e47
f594991
991ac6c
01f5c0e
6111632
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,67 @@ use crate::hummock::value::HummockValue; | |
use crate::hummock::HummockResult; | ||
use crate::monitor::StoreLocalStatistic; | ||
|
||
/// State transitions: | ||
/// Init -> Ready: The initial entry after seek/rewind can be exposed. | ||
/// Init -> Next: The initial entry after seek/rewind cannot be exposed. | ||
/// Init/Next -> Invalid: Iterator has been invalidated. | ||
/// Ready/Next -> Next: Continue to call next on the inner iterator. | ||
/// Next -> Ready: The current entry can be exposed. | ||
enum IteratorState { | ||
/// Initial state on iterator creation | ||
Init, | ||
|
||
/// Iterator is pointed to a valid key that can be exposed. | ||
Ready, | ||
|
||
/// Iterator is pointed to a key that should not be exposed. next() should be called. | ||
Next, | ||
|
||
/// Iterator is no longer valid. Reasons: | ||
/// - Iterator is pointed to a out-of-range key. | ||
/// - Iteraror meets EOF. | ||
/// - The internal iterator is not valid. | ||
/// - Error happens during iteration. | ||
Invalid, | ||
} | ||
|
||
impl IteratorState { | ||
#[inline] | ||
fn is_next(&self) -> bool { | ||
matches!(self, Self::Next) | ||
} | ||
|
||
#[inline] | ||
fn is_ready(&self) -> bool { | ||
matches!(self, Self::Ready) | ||
} | ||
|
||
#[inline] | ||
fn is_init(&self) -> bool { | ||
matches!(self, Self::Init) | ||
} | ||
|
||
#[inline] | ||
fn next(&mut self) { | ||
*self = IteratorState::Next; | ||
} | ||
|
||
#[inline] | ||
fn ready(&mut self) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. When I read code, I always get lost in
Wouldn't it be easier to understand to explicitly use set_state(IteratorState) to change the state? feel free to ignore me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a good point. I am also having a hard time on deciding the naming. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is indeed better to put |
||
*self = IteratorState::Ready; | ||
} | ||
|
||
#[inline] | ||
fn reset(&mut self) { | ||
*self = IteratorState::Init; | ||
} | ||
|
||
#[inline] | ||
fn invalidate(&mut self) { | ||
*self = IteratorState::Invalid; | ||
} | ||
} | ||
|
||
/// [`UserIterator`] can be used by user directly. | ||
pub struct UserIterator<I: HummockIterator<Direction = Forward>> { | ||
/// Inner table iterator. | ||
|
@@ -37,9 +98,6 @@ pub struct UserIterator<I: HummockIterator<Direction = Forward>> { | |
/// Last user value | ||
last_val: Bytes, | ||
|
||
/// Flag for whether the iterator reach over the right end of the range. | ||
out_of_range: bool, | ||
|
||
/// Start and end bounds of user key. | ||
key_range: UserKeyRange, | ||
|
||
|
@@ -55,6 +113,9 @@ pub struct UserIterator<I: HummockIterator<Direction = Forward>> { | |
stats: StoreLocalStatistic, | ||
|
||
delete_range_iter: ForwardMergeRangeIterator, | ||
|
||
/// Current state of the iterator | ||
state: IteratorState, | ||
} | ||
|
||
// TODO: decide whether this should also impl `HummockIterator` | ||
|
@@ -70,7 +131,6 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
) -> Self { | ||
Self { | ||
iterator, | ||
out_of_range: false, | ||
key_range, | ||
last_key: FullKey::default(), | ||
last_val: Bytes::new(), | ||
|
@@ -79,6 +139,7 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
stats: StoreLocalStatistic::default(), | ||
delete_range_iter, | ||
_version: version, | ||
state: IteratorState::Init, | ||
} | ||
} | ||
|
||
|
@@ -96,62 +157,21 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
} | ||
|
||
/// Gets the iterator move to the next step. | ||
/// See `IteratorState` for the state machine details. | ||
/// | ||
/// Returned result: | ||
/// - if `Ok(())` is returned, it means that the iterator successfully move to the next position | ||
/// (may reach to the end and thus not valid) | ||
/// - if `Err(_) ` is returned, it means that some error happened. | ||
pub async fn next(&mut self) -> HummockResult<()> { | ||
while self.iterator.is_valid() { | ||
let full_key = self.iterator.key(); | ||
let epoch = full_key.epoch_with_gap.pure_epoch(); | ||
|
||
// handle multi-version | ||
if epoch < self.min_epoch || epoch > self.read_epoch { | ||
self.iterator.next().await?; | ||
continue; | ||
// Iterator must be initialized via seek/rewind before calling next is called | ||
assert!(!self.state.is_init()); | ||
loop { | ||
self.step_one().await?; | ||
if !self.state.is_next() { | ||
return Ok(()); | ||
} | ||
|
||
if self.last_key.user_key.as_ref() != full_key.user_key { | ||
// It is better to early return here if the user key is already | ||
// out of range to avoid unnecessary access on the range tomestones | ||
// via `delete_range_iter`. | ||
// For example, if we are iterating with key range [0x0a, 0x0c) and the | ||
// current key is 0xff, we will access range tombstones in [0x0c, 0xff], | ||
// which is a waste of work. | ||
if self.key_out_of_range() { | ||
self.out_of_range = true; | ||
return Ok(()); | ||
} | ||
|
||
self.last_key = full_key.copy_into(); | ||
// handle delete operation | ||
match self.iterator.value() { | ||
HummockValue::Put(val) => { | ||
self.delete_range_iter.next_until(full_key.user_key).await?; | ||
if self.delete_range_iter.current_epoch() >= epoch { | ||
self.stats.skip_delete_key_count += 1; | ||
} else { | ||
self.last_val = Bytes::copy_from_slice(val); | ||
self.stats.processed_key_count += 1; | ||
return Ok(()); | ||
} | ||
} | ||
// It means that the key is deleted from the storage. | ||
// Deleted kv and the previous versions (if any) of the key should not be | ||
// returned to user. | ||
HummockValue::Delete => { | ||
self.stats.skip_delete_key_count += 1; | ||
} | ||
} | ||
} else { | ||
self.stats.skip_multi_version_key_count += 1; | ||
} | ||
|
||
self.iterator.next().await?; | ||
} | ||
|
||
Ok(()) // not valid, EOF | ||
} | ||
|
||
/// Returns the key with the newest version. Thus no version in it, and only the `user_key` will | ||
|
@@ -176,8 +196,7 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
|
||
/// Resets the iterating position to the beginning. | ||
pub async fn rewind(&mut self) -> HummockResult<()> { | ||
// Reset | ||
self.out_of_range = false; | ||
self.state.reset(); | ||
|
||
// Handle range scan | ||
match &self.key_range.0 { | ||
|
@@ -188,11 +207,12 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
}; | ||
self.iterator.seek(full_key.to_ref()).await?; | ||
if !self.iterator.is_valid() { | ||
self.state.invalidate(); | ||
return Ok(()); | ||
} | ||
|
||
if self.key_out_of_range() { | ||
self.out_of_range = true; | ||
self.state.invalidate(); | ||
return Ok(()); | ||
} | ||
|
||
|
@@ -202,23 +222,25 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
Unbounded => { | ||
self.iterator.rewind().await?; | ||
if !self.iterator.is_valid() { | ||
self.state.invalidate(); | ||
return Ok(()); | ||
} | ||
|
||
self.delete_range_iter.rewind().await?; | ||
} | ||
}; | ||
|
||
// Handle multi-version | ||
self.last_key = FullKey::default(); | ||
// Handles range scan when key > end_key | ||
self.next().await | ||
self.step_one().await?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we directly use self.next() (which includes step_one loop)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point! |
||
if self.state.is_next() { | ||
self.next().await?; | ||
} | ||
Ok(()) | ||
} | ||
|
||
/// Resets the iterating position to the first position where the key >= provided key. | ||
pub async fn seek(&mut self, user_key: UserKey<&[u8]>) -> HummockResult<()> { | ||
// Reset | ||
self.out_of_range = false; | ||
self.state.reset(); | ||
|
||
// Handle range scan when key < begin_key | ||
let user_key = match &self.key_range.0 { | ||
|
@@ -240,35 +262,119 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
}; | ||
self.iterator.seek(full_key).await?; | ||
if !self.iterator.is_valid() { | ||
self.state.invalidate(); | ||
return Ok(()); | ||
} | ||
|
||
if self.key_out_of_range() { | ||
self.out_of_range = true; | ||
self.state.invalidate(); | ||
return Ok(()); | ||
} | ||
|
||
self.delete_range_iter.seek(full_key.user_key).await?; | ||
|
||
// Handle multi-version | ||
self.last_key = FullKey::default(); | ||
// Handle range scan when key > end_key | ||
|
||
self.next().await | ||
self.step_one().await?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
if self.state.is_next() { | ||
self.next().await?; | ||
} | ||
Ok(()) | ||
} | ||
|
||
/// Indicates whether the iterator can be used. | ||
pub fn is_valid(&self) -> bool { | ||
// Handle range scan | ||
// key >= begin_key is guaranteed by seek/rewind function | ||
(!self.out_of_range) && self.iterator.is_valid() | ||
self.state.is_ready() | ||
} | ||
|
||
pub fn collect_local_statistic(&self, stats: &mut StoreLocalStatistic) { | ||
stats.add(&self.stats); | ||
self.iterator.collect_local_statistic(stats); | ||
} | ||
|
||
/// Transition iterator state. | ||
/// See `IteratorState` for the state machine details. | ||
async fn step_one(&mut self) -> HummockResult<()> { | ||
match self.state { | ||
IteratorState::Init => { | ||
self.last_key = FullKey::default(); | ||
self.on_next().await?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks correct to me. However, I did not find the pattern of calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have refactored the logic:
|
||
Ok(()) | ||
} | ||
IteratorState::Ready => { | ||
self.state.next(); | ||
Ok(()) | ||
} | ||
IteratorState::Next => { | ||
self.iterator.next().await?; | ||
self.on_next().await?; | ||
Ok(()) | ||
} | ||
IteratorState::Invalid => { | ||
// End state | ||
Ok(()) | ||
} | ||
} | ||
} | ||
|
||
async fn on_next(&mut self) -> HummockResult<()> { | ||
if !self.iterator.is_valid() { | ||
self.state.invalidate(); | ||
return Ok(()); | ||
} | ||
|
||
let full_key = self.iterator.key(); | ||
let epoch = full_key.epoch_with_gap.pure_epoch(); | ||
|
||
// Handle epoch visibility | ||
if epoch < self.min_epoch || epoch > self.read_epoch { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: why need to take the next key when the epoch is already greater than read_epoch? read_epoch means that the Iterator should expose the key that epoch greater than it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because you need to move onto the next user key. In order to do that, you need to continue to call |
||
self.state.next(); | ||
return Ok(()); | ||
} | ||
|
||
// It is better to early return here if the user key is already | ||
// out of range to avoid unnecessary access on the range tomestones | ||
// via `delete_range_iter`. | ||
// For example, if we are iterating with key range [0x0a, 0x0c) and the | ||
// current key is 0xff, we will access range tombstones in [0x0c, 0xff], | ||
// which is a waste of work. | ||
if self.key_out_of_range() { | ||
self.state.invalidate(); | ||
return Ok(()); | ||
} | ||
|
||
// Skip older version entry for the same user key | ||
if self.last_key.user_key.as_ref() == full_key.user_key { | ||
self.stats.skip_multi_version_key_count += 1; | ||
self.state.next(); | ||
return Ok(()); | ||
} | ||
|
||
self.last_key = full_key.copy_into(); | ||
|
||
// Handle delete operation | ||
match self.iterator.value() { | ||
HummockValue::Put(val) => { | ||
self.delete_range_iter.next_until(full_key.user_key).await?; | ||
if self.delete_range_iter.current_epoch() >= epoch { | ||
self.stats.skip_delete_key_count += 1; | ||
} else { | ||
self.last_val = Bytes::copy_from_slice(val); | ||
self.stats.processed_key_count += 1; | ||
self.state.ready(); | ||
return Ok(()); | ||
} | ||
} | ||
// It means that the key is deleted from the storage. | ||
// Deleted kv and the previous versions (if any) of the key should not be | ||
// returned to user. | ||
HummockValue::Delete => { | ||
self.stats.skip_delete_key_count += 1; | ||
} | ||
} | ||
|
||
self.state.next(); | ||
Ok(()) | ||
} | ||
|
||
// Validate whether the current key is already out of range. | ||
fn key_out_of_range(&self) -> bool { | ||
assert!(self.iterator.is_valid()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems that
Init
andNext
is intermediate state that is invisible outside the scope of method calls. Is it possible to declare the two states only in the scope of thenext
call?