-
Notifications
You must be signed in to change notification settings - Fork 592
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor: fix incorrect skip multi-version key metric and save unneccessary key comparison #14494
Changes from 7 commits
e6301f2
2caa829
edaae7e
e5efde9
daf97b4
18b6e47
f594991
991ac6c
01f5c0e
6111632
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,42 @@ use crate::hummock::value::HummockValue; | |
use crate::hummock::HummockResult; | ||
use crate::monitor::StoreLocalStatistic; | ||
|
||
/// State transitions: | ||
/// Init -> Ready: The initial entry after seek/rewind can be exposed. | ||
/// Init -> Next: The initial entry after seek/rewind cannot be exposed. | ||
/// Init/Next -> Invalid: Iterator has been invalidated. | ||
/// Ready/Next -> Next: Continue to call next on the inner iterator. | ||
/// Next -> Ready: The current entry can be exposed. | ||
enum IteratorState { | ||
/// Initial state on iterator creation | ||
Init, | ||
|
||
/// Iterator is pointed to a valid key that can be exposed. | ||
Ready, | ||
|
||
/// Iterator is pointed to a key that should not be exposed. next() should be called. | ||
Next, | ||
|
||
/// Iterator is no longer valid. Reasons: | ||
/// - Iterator is pointed to a out-of-range key. | ||
/// - Iteraror meets EOF. | ||
/// - The internal iterator is not valid. | ||
/// - Error happens during iteration. | ||
Invalid, | ||
} | ||
|
||
impl IteratorState { | ||
#[inline] | ||
fn is_ready(&self) -> bool { | ||
matches!(self, Self::Ready) | ||
} | ||
|
||
#[inline] | ||
fn transition_to(&mut self, new_state: IteratorState) { | ||
*self = new_state; | ||
} | ||
} | ||
|
||
/// [`UserIterator`] can be used by user directly. | ||
pub struct UserIterator<I: HummockIterator<Direction = Forward>> { | ||
/// Inner table iterator. | ||
|
@@ -37,9 +73,6 @@ pub struct UserIterator<I: HummockIterator<Direction = Forward>> { | |
/// Last user value | ||
last_val: Bytes, | ||
|
||
/// Flag for whether the iterator reach over the right end of the range. | ||
out_of_range: bool, | ||
|
||
/// Start and end bounds of user key. | ||
key_range: UserKeyRange, | ||
|
||
|
@@ -55,6 +88,9 @@ pub struct UserIterator<I: HummockIterator<Direction = Forward>> { | |
stats: StoreLocalStatistic, | ||
|
||
delete_range_iter: ForwardMergeRangeIterator, | ||
|
||
/// Current state of the iterator | ||
state: IteratorState, | ||
} | ||
|
||
// TODO: decide whether this should also impl `HummockIterator` | ||
|
@@ -70,7 +106,6 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
) -> Self { | ||
Self { | ||
iterator, | ||
out_of_range: false, | ||
key_range, | ||
last_key: FullKey::default(), | ||
last_val: Bytes::new(), | ||
|
@@ -79,6 +114,7 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
stats: StoreLocalStatistic::default(), | ||
delete_range_iter, | ||
_version: version, | ||
state: IteratorState::Init, | ||
} | ||
} | ||
|
||
|
@@ -96,62 +132,34 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
} | ||
|
||
/// Gets the iterator move to the next step. | ||
/// See `IteratorState` for the state machine details. | ||
/// | ||
/// Returned result: | ||
/// - if `Ok(())` is returned, it means that the iterator successfully move to the next position | ||
/// (may reach to the end and thus not valid) | ||
/// - if `Err(_) ` is returned, it means that some error happened. | ||
pub async fn next(&mut self) -> HummockResult<()> { | ||
while self.iterator.is_valid() { | ||
let full_key = self.iterator.key(); | ||
let epoch = full_key.epoch_with_gap.pure_epoch(); | ||
|
||
// handle multi-version | ||
if epoch < self.min_epoch || epoch > self.read_epoch { | ||
self.iterator.next().await?; | ||
continue; | ||
} | ||
// Move the iterator to the next step if it is currently potined to a ready entry. | ||
if self.state.is_ready() { | ||
self.state.transition_to(IteratorState::Next); | ||
} | ||
|
||
if self.last_key.user_key.as_ref() != full_key.user_key { | ||
// It is better to early return here if the user key is already | ||
// out of range to avoid unnecessary access on the range tomestones | ||
// via `delete_range_iter`. | ||
// For example, if we are iterating with key range [0x0a, 0x0c) and the | ||
// current key is 0xff, we will access range tombstones in [0x0c, 0xff], | ||
// which is a waste of work. | ||
if self.key_out_of_range() { | ||
self.out_of_range = true; | ||
return Ok(()); | ||
// Loop until the iterator becomes ready or invalid. | ||
loop { | ||
match self.state { | ||
IteratorState::Init => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The state
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed. |
||
self.last_key = FullKey::default(); | ||
self.step_one().await?; | ||
} | ||
|
||
self.last_key = full_key.copy_into(); | ||
// handle delete operation | ||
match self.iterator.value() { | ||
HummockValue::Put(val) => { | ||
self.delete_range_iter.next_until(full_key.user_key).await?; | ||
if self.delete_range_iter.current_epoch() >= epoch { | ||
self.stats.skip_delete_key_count += 1; | ||
} else { | ||
self.last_val = Bytes::copy_from_slice(val); | ||
self.stats.processed_key_count += 1; | ||
return Ok(()); | ||
} | ||
} | ||
// It means that the key is deleted from the storage. | ||
// Deleted kv and the previous versions (if any) of the key should not be | ||
// returned to user. | ||
HummockValue::Delete => { | ||
self.stats.skip_delete_key_count += 1; | ||
} | ||
IteratorState::Next => { | ||
self.iterator.next().await?; | ||
self.step_one().await?; | ||
} | ||
IteratorState::Ready | IteratorState::Invalid => { | ||
return Ok(()); | ||
} | ||
} else { | ||
self.stats.skip_multi_version_key_count += 1; | ||
} | ||
|
||
self.iterator.next().await?; | ||
} | ||
|
||
Ok(()) // not valid, EOF | ||
} | ||
|
||
/// Returns the key with the newest version. Thus no version in it, and only the `user_key` will | ||
|
@@ -177,7 +185,7 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
/// Resets the iterating position to the beginning. | ||
pub async fn rewind(&mut self) -> HummockResult<()> { | ||
// Reset | ||
self.out_of_range = false; | ||
self.state.transition_to(IteratorState::Init); | ||
|
||
// Handle range scan | ||
match &self.key_range.0 { | ||
|
@@ -188,11 +196,12 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
}; | ||
self.iterator.seek(full_key.to_ref()).await?; | ||
if !self.iterator.is_valid() { | ||
self.state.transition_to(IteratorState::Invalid); | ||
return Ok(()); | ||
} | ||
|
||
if self.key_out_of_range() { | ||
self.out_of_range = true; | ||
self.state.transition_to(IteratorState::Invalid); | ||
return Ok(()); | ||
} | ||
|
||
|
@@ -202,23 +211,21 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
Unbounded => { | ||
self.iterator.rewind().await?; | ||
if !self.iterator.is_valid() { | ||
self.state.transition_to(IteratorState::Invalid); | ||
return Ok(()); | ||
} | ||
|
||
self.delete_range_iter.rewind().await?; | ||
} | ||
}; | ||
|
||
// Handle multi-version | ||
self.last_key = FullKey::default(); | ||
// Handles range scan when key > end_key | ||
self.next().await | ||
} | ||
|
||
/// Resets the iterating position to the first position where the key >= provided key. | ||
pub async fn seek(&mut self, user_key: UserKey<&[u8]>) -> HummockResult<()> { | ||
// Reset | ||
self.out_of_range = false; | ||
self.state.transition_to(IteratorState::Init); | ||
|
||
// Handle range scan when key < begin_key | ||
let user_key = match &self.key_range.0 { | ||
|
@@ -240,35 +247,91 @@ impl<I: HummockIterator<Direction = Forward>> UserIterator<I> { | |
}; | ||
self.iterator.seek(full_key).await?; | ||
if !self.iterator.is_valid() { | ||
self.state.transition_to(IteratorState::Invalid); | ||
return Ok(()); | ||
} | ||
|
||
if self.key_out_of_range() { | ||
self.out_of_range = true; | ||
self.state.transition_to(IteratorState::Invalid); | ||
return Ok(()); | ||
} | ||
|
||
self.delete_range_iter.seek(full_key.user_key).await?; | ||
|
||
// Handle multi-version | ||
self.last_key = FullKey::default(); | ||
// Handle range scan when key > end_key | ||
|
||
self.next().await | ||
} | ||
|
||
/// Indicates whether the iterator can be used. | ||
pub fn is_valid(&self) -> bool { | ||
// Handle range scan | ||
// key >= begin_key is guaranteed by seek/rewind function | ||
(!self.out_of_range) && self.iterator.is_valid() | ||
self.state.is_ready() | ||
} | ||
|
||
pub fn collect_local_statistic(&self, stats: &mut StoreLocalStatistic) { | ||
stats.add(&self.stats); | ||
self.iterator.collect_local_statistic(stats); | ||
} | ||
|
||
/// Check the inner iterator and try move onto the next state once. | ||
async fn step_one(&mut self) -> HummockResult<()> { | ||
if !self.iterator.is_valid() { | ||
self.state.transition_to(IteratorState::Invalid); | ||
return Ok(()); | ||
} | ||
|
||
let full_key = self.iterator.key(); | ||
let epoch = full_key.epoch_with_gap.pure_epoch(); | ||
|
||
// Handle epoch visibility | ||
if epoch < self.min_epoch || epoch > self.read_epoch { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question: why need to take the next key when the epoch is already greater than read_epoch? read_epoch means that the Iterator should expose the key that epoch greater than it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because you need to move onto the next user key. In order to do that, you need to continue to call |
||
self.state.transition_to(IteratorState::Next); | ||
return Ok(()); | ||
} | ||
|
||
// It is better to early return here if the user key is already | ||
// out of range to avoid unnecessary access on the range tomestones | ||
// via `delete_range_iter`. | ||
// For example, if we are iterating with key range [0x0a, 0x0c) and the | ||
// current key is 0xff, we will access range tombstones in [0x0c, 0xff], | ||
// which is a waste of work. | ||
if self.key_out_of_range() { | ||
self.state.transition_to(IteratorState::Invalid); | ||
return Ok(()); | ||
} | ||
|
||
// Skip older version entry for the same user key | ||
if self.last_key.user_key.as_ref() == full_key.user_key { | ||
self.stats.skip_multi_version_key_count += 1; | ||
self.state.transition_to(IteratorState::Next); | ||
return Ok(()); | ||
} | ||
|
||
self.last_key = full_key.copy_into(); | ||
|
||
// Handle delete operation | ||
match self.iterator.value() { | ||
HummockValue::Put(val) => { | ||
self.delete_range_iter.next_until(full_key.user_key).await?; | ||
if self.delete_range_iter.current_epoch() >= epoch { | ||
self.stats.skip_delete_key_count += 1; | ||
} else { | ||
self.last_val = Bytes::copy_from_slice(val); | ||
self.stats.processed_key_count += 1; | ||
self.state.transition_to(IteratorState::Ready); | ||
return Ok(()); | ||
} | ||
} | ||
// It means that the key is deleted from the storage. | ||
// Deleted kv and the previous versions (if any) of the key should not be | ||
// returned to user. | ||
HummockValue::Delete => { | ||
self.stats.skip_delete_key_count += 1; | ||
} | ||
} | ||
|
||
self.state.transition_to(IteratorState::Next); | ||
Ok(()) | ||
} | ||
|
||
// Validate whether the current key is already out of range. | ||
fn key_out_of_range(&self) -> bool { | ||
assert!(self.iterator.is_valid()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems that
Init
andNext
is intermediate state that is invisible outside the scope of method calls. Is it possible to declare the two states only in the scope of thenext
call?