Skip to content

Commit

Permalink
refactor: block
Browse files Browse the repository at this point in the history
Add key overlap
  • Loading branch information
Yongxin-Hu committed Mar 18, 2024
1 parent b704160 commit a34b9ba
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 49 deletions.
58 changes: 46 additions & 12 deletions src/engines/lsm/block/builder.rs
Original file line number Diff line number Diff line change
@@ -1,48 +1,82 @@
use bytes::BufMut;
use structopt::lazy_static::lazy_static;
use crate::engines::lsm::block::Block;
use crate::engines::lsm::key::{KeySlice, KeyVec};

const SIZEOF_U16: usize = std::mem::size_of::<u16>();

pub struct BlockBuilder {
/// 每个 kv-pair 的 offset
offsets: Vec<u16>,
/// [key_len(2byte), key , value_len(2byte), value]
/// [key_overlap_len(u16), rest_key_len(u16),key(rest_key_len), value_len(u16), value]
/// 由于 block 内部的 key 是有序排列的,
/// 使用 key_overlap_len 记录 key和 block 的 first_key 重合的长度来减少重复记录前缀
data: Vec<u8>,
/// Block大小(Byte)
block_size: usize
block_size: usize,
/// First Key
first_key: Vec<u8>
}

impl BlockBuilder {
/// 创建 BlockBuilder
/// # 参数
/// * `block_size`: Block大小(Byte)
pub fn new(block_size: usize) -> Self {
BlockBuilder{
offsets: Vec::new(),
data: Vec::new(),
block_size
block_size,
first_key: Vec::new()
}
}

/// 计算 key 和 first_key 重叠的 byte 数
/// # 参数
/// * `first_key`: block first key
/// * `key`: key
/// # 返回值
/// * 重叠的 byte 数
fn calc_overlap(first_key: &[u8], key: &[u8]) -> usize {
let mut index = 0;
loop {
if index >= first_key.len() || index >= key.len(){
break;
}
if first_key[index] != key[index]{
break;
}
index += 1;
}
index
}

/// 将 kv-pair 添加到块中。当 Block 已满时返回 false
/// # 参数
/// * key key
/// * value value
/// * key: key
/// * value: value
/// # 返回值
/// true 添加成功
/// false 添加失败
/// true: 添加成功 false: 添加失败
#[must_use]
pub fn add(&mut self, key: &[u8], value: &[u8]) -> bool {
assert!(!key.is_empty(), "key must not be empty");

if self.check_size(key, value) || self.is_empty() /* 允许放入的第一个 kv-pair 超过block_size */{
if self.check_size(key, value) || self.is_empty() /* 允许放入的第一个 kv-pair 超过 block_size */{
self.offsets.push(self.data.len() as u16);
// key_len
self.data.put_u16(key.len() as u16);
// key
self.data.put(key);
let key_overlap_len = Self::calc_overlap(self.first_key.as_slice(), key);
// key_overlap_len
self.data.put_u16(key_overlap_len as u16);
// rest_key_len
self.data.put_u16((key.len() - key_overlap_len) as u16);
// rest_key
self.data.put(&key[key_overlap_len..]);
// value_len
self.data.put_u16(value.len() as u16);
// value
self.data.put(value);
if self.first_key.is_empty() {
self.first_key = key.to_vec();
}
return true;
}
false
Expand Down
70 changes: 38 additions & 32 deletions src/engines/lsm/block/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ pub struct BlockIterator {
block: Arc<Block>,
/// 当前 key ,空表示迭代器无效
key: Bytes,
/// the value range from the block
/// 当前 value block 中的范围
value_range: (usize, usize),
/// 当前 kv-pair 的索引, 范围在[0, num_of_element)
idx: usize,
Expand All @@ -17,6 +17,7 @@ pub struct BlockIterator {
impl Block {
fn get_first_key(&self) -> &[u8] {
let mut buf = &self.data[..];
buf.get_u16();
let key_len = buf.get_u16();
let key = &buf[..key_len as usize];
key
Expand All @@ -37,27 +38,27 @@ impl BlockIterator{
}
}

// 创建 BlockIterator 并且定位到第一个 kv-pair
pub fn create_and_seek_to_first(block: Arc<Block>) -> Self {
// 创建 BlockIterator 并且移动到第一个 kv-pair
pub fn create_and_move_to_first(block: Arc<Block>) -> Self {
let mut iter = BlockIterator::new(block);
iter.seek_to_first();
iter.move_to_first();
iter
}

/// 创建 BlockIterator 并且定位到第一个 key >= `key`
pub fn create_and_seek_to_key(block: Arc<Block>, key: &[u8]) -> Self {
/// 创建 BlockIterator 并且移动到第一个 key >= `key`
pub fn create_and_move_to_key(block: Arc<Block>, key: &[u8]) -> Self {
let mut iter = BlockIterator::new(block);
iter.seek_to_key(key);
iter.move_to_key(key);
iter
}

// 返回当前的 key
/// 返回当前的 key
pub fn key(&self) -> &[u8] {
assert!(!self.key.is_empty(), "invalid iterator, key must not empty");
self.key.as_ref()
}

// 返回当前的 value
/// 返回当前的 value
pub fn value(&self) -> &[u8] {
assert!(!self.key.is_empty(), "invalid iterator, key must not empty");
&self.block.data[self.value_range.0..self.value_range.1]
Expand All @@ -68,36 +69,36 @@ impl BlockIterator{
!self.key.is_empty()
}

// 定位到第一个 kv-pair
pub fn seek_to_first(&mut self) {
self.seek_to(0);
/// 移动到第一个 kv-pair
pub fn move_to_first(&mut self) {
self.move_to(0);
}

// 移动到下一个 kv-pair
/// 移动到下一个 kv-pair
pub fn next(&mut self) {
let next_index = self.idx + 1;
self.seek_to(next_index);
self.move_to(next_index);
}

// 定位到第一个 key >= `key`
pub fn seek_to_key(&mut self, key: &[u8]) {
/// 移动到第一个 key >= `key`
pub fn move_to_key(&mut self, key: &[u8]) {
let mut low = 0;
let mut high = self.block.offsets.len();
while low < high {
let mid = low + (high - low) / 2;
self.seek_to(mid);
self.move_to(mid);
assert!(self.is_valid());
match self.key().cmp(&key) {
std::cmp::Ordering::Less => low = mid + 1,
std::cmp::Ordering::Greater => high = mid,
std::cmp::Ordering::Equal => return,
}
}
self.seek_to(low);
self.move_to(low);
}

// 定位到第 index 个 kv-pair
fn seek_to(&mut self, index: usize){
/// 移动到第 index 个 kv-pair
fn move_to(&mut self, index: usize){

if index >= self.block.offsets.len() {
self.key.clear();
Expand All @@ -107,17 +108,22 @@ impl BlockIterator{

let offset = self.block.offsets[index] as usize;
let mut data = &self.block.data[offset..];
// Key
let key_len = data.get_u16() as usize;
let key = &data[..key_len];

// rest key
let key_overlap_len = data.get_u16() as usize;
let rest_key_len = data.get_u16() as usize;
let rest_key = &data[..rest_key_len];
data.advance(rest_key_len);
// value
data.advance(key_len);
let value_len = data.get_u16() as usize;
let value_start = offset + SIZEOF_U16/* key_len */ + key_len /* key */ + SIZEOF_U16/* value_len */;
let value_start = offset + 2 * SIZEOF_U16/* key_overlap_len+rest_key_len */
+ rest_key_len /* rest_key */ + SIZEOF_U16/* value_len */;
let value_end = value_start + value_len;

self.idx = index;
self.key = Bytes::copy_from_slice(key);
let mut key = self.first_key.as_ref()[..key_overlap_len].to_vec();
key.extend_from_slice(rest_key);
self.key = Bytes::from(key);
self.value_range = (value_start, value_end);
}
}
Expand Down Expand Up @@ -169,7 +175,7 @@ mod test{
#[test]
fn test_block_iterator() {
let block = Arc::new(generate_block());
let mut iter = BlockIterator::create_and_seek_to_first(block);
let mut iter = BlockIterator::create_and_move_to_first(block);

for _ in 0..5 {
for i in 0..100 {
Expand All @@ -191,7 +197,7 @@ mod test{
);
iter.next();
}
iter.seek_to_first();
iter.move_to_first();
}
}

Expand All @@ -203,7 +209,7 @@ mod test{
let block = block_builder.build();
let encoded = block.encode();
let decoded_block = Block::decode(&encoded);
let mut block_iterator = BlockIterator::create_and_seek_to_first(Arc::new(decoded_block));
let mut block_iterator = BlockIterator::create_and_move_to_first(Arc::new(decoded_block));
assert_eq!(block_iterator.key(), b"key1");
block_iterator.next();
assert_eq!(block_iterator.key(), b"key2");
Expand All @@ -213,7 +219,7 @@ mod test{
#[test]
fn test_block_seek_key() {
let block = Arc::new(generate_block());
let mut iter = BlockIterator::create_and_seek_to_key(block, key_of(0).as_slice());
let mut iter = BlockIterator::create_and_move_to_key(block, key_of(0).as_slice());
for offset in 1..=5 {
for i in 0..num_of_keys() {
let key = iter.key();
Expand All @@ -232,9 +238,9 @@ mod test{
as_bytes(&value_of(i)),
as_bytes(value)
);
iter.seek_to_key(&format!("key_{:03}", i * 5 + offset).into_bytes());
iter.move_to_key(&format!("key_{:03}", i * 5 + offset).into_bytes());
}
iter.seek_to_key(b"k");
iter.move_to_key(b"k");
}
}
}
10 changes: 5 additions & 5 deletions src/engines/lsm/table/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ impl SsTableIterator {
let first_block = table.read_block(0)?;
Ok(SsTableIterator{
table: Arc::clone(&table),
block_iterator: BlockIterator::create_and_seek_to_first(first_block),
block_iterator: BlockIterator::create_and_move_to_first(first_block),
block_index: 0
})
}
Expand All @@ -26,19 +26,19 @@ impl SsTableIterator {
pub fn seek_to_first(&mut self) -> Result<()> {
let first_block = self.table.read_block(0)?;
self.block_index = 0;
self.block_iterator = BlockIterator::create_and_seek_to_first(first_block);
self.block_iterator = BlockIterator::create_and_move_to_first(first_block);
Ok(())
}

fn seek_to_key_inner(table: &Arc<SsTable>, key: Bytes) -> Result<(usize, BlockIterator)> {
let mut block_idx = table.find_block_idx(key.clone());
let mut block = table.read_block(block_idx)?;
let mut block_iterator = BlockIterator::create_and_seek_to_key(block, key.as_ref());
let mut block_iterator = BlockIterator::create_and_move_to_key(block, key.as_ref());
if !block_iterator.is_valid() {
block_idx += 1;
if block_idx < table.num_of_blocks() {
block = table.read_block(block_idx)?;
block_iterator = BlockIterator::create_and_seek_to_key(block, key.as_ref());
block_iterator = BlockIterator::create_and_move_to_key(block, key.as_ref());
}
}
Ok((block_idx, block_iterator))
Expand Down Expand Up @@ -82,7 +82,7 @@ impl StorageIterator for SsTableIterator {
self.block_index += 1;
if self.block_index < self.table.num_of_blocks() {
let block = self.table.read_block(self.block_index)?;
self.block_iterator = BlockIterator::create_and_seek_to_first(block);
self.block_iterator = BlockIterator::create_and_move_to_first(block);
}
}
Ok(())
Expand Down
1 change: 1 addition & 0 deletions src/engines/lsm/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use crate::engines::lsm::storage::LsmStorageInner;
use crate::engines::lsm::storage::state::LsmStorageState;
use crate::engines::lsm::table::iterator::SsTableIterator;

#[inline]
pub(crate) fn map_bound(bound: Bound<&[u8]>) -> Bound<Bytes> {
match bound {
Bound::Included(x) => Bound::Included(Bytes::copy_from_slice(x)),
Expand Down

0 comments on commit a34b9ba

Please sign in to comment.