Skip to content

Commit

Permalink
feat(storage): introduce new field sst_size for SstableInfo (#18005)
Browse files Browse the repository at this point in the history
  • Loading branch information
Li0k authored Aug 26, 2024
1 parent b0e50b4 commit 4e3b9ff
Show file tree
Hide file tree
Showing 26 changed files with 143 additions and 57 deletions.
5 changes: 5 additions & 0 deletions proto/hummock.proto
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ message SstableInfo {
uint64 object_id = 1;
uint64 sst_id = 2;
KeyRange key_range = 3;
// represents the physical object size, which is usually used in the builder.
uint64 file_size = 4;
repeated uint32 table_ids = 5;
uint64 meta_offset = 6;
Expand All @@ -28,6 +29,10 @@ message SstableInfo {
uint64 uncompressed_file_size = 11;
uint64 range_tombstone_count = 12;
BloomFilterType bloom_filter_kind = 13;

// In order to calculate more finely in the compaction strategy, we need to re-calculate the sst_size after split sst
// `sst_size` represents the size of the sst instead of the object size(usually used in the meta).
uint64 sst_size = 14;
}

enum LevelType {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ struct RwHummockSstable {
range_tombstone_count: i64,
bloom_filter_kind: i32,
table_ids: JsonbVal,
sst_size: i64,
}

#[system_catalog(table, "rw_catalog.rw_hummock_current_version")]
Expand Down Expand Up @@ -134,6 +135,7 @@ fn version_to_sstable_rows(version: HummockVersion) -> Vec<RwHummockSstable> {
range_tombstone_count: sst.range_tombstone_count as _,
bloom_filter_kind: sst.bloom_filter_kind as _,
table_ids: json!(sst.table_ids).into(),
sst_size: sst.sst_size as _,
});
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ impl LevelCompactionPicker {
break;
}

target_level_size += sst.file_size;
target_level_size += sst.sst_size;
}

if pending_compact {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ impl CompactionTaskValidationRule for IntraCompactionTaskValidationRule {
let level_select_size = select_level
.table_infos
.iter()
.map(|sst| sst.file_size)
.map(|sst| sst.sst_size)
.sum::<u64>();

max_level_size = std::cmp::max(max_level_size, level_select_size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ impl IntraCompactionPicker {
for level_select_table in &input.sstable_infos {
let level_select_size = level_select_table
.iter()
.map(|sst| sst.file_size)
.map(|sst| sst.sst_size)
.sum::<u64>();

max_level_size = std::cmp::max(max_level_size, level_select_size);
Expand Down Expand Up @@ -291,7 +291,7 @@ impl IntraCompactionPicker {
.check_multiple_overlap(&l0.sub_levels[idx].table_infos)
.is_empty());

let select_input_size = select_sst.file_size;
let select_input_size = select_sst.sst_size;
let input_levels = vec![
InputLevel {
level_idx: 0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,8 @@ impl CompactionPicker for ManualCompactionPicker {
}

Some(CompactionInput {
select_input_size: select_input_ssts.iter().map(|sst| sst.file_size).sum(),
target_input_size: target_input_ssts.iter().map(|sst| sst.file_size).sum(),
select_input_size: select_input_ssts.iter().map(|sst| sst.sst_size).sum(),
target_input_size: target_input_ssts.iter().map(|sst| sst.sst_size).sum(),
total_file_count: (select_input_ssts.len() + target_input_ssts.len()) as u64,
input_levels: vec![
InputLevel {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ impl MinOverlappingPicker {
let mut target_level_overlap_range = select_file_ranges[left].1.clone();
let mut total_file_size = 0;
for other in &target_tables[target_level_overlap_range.clone()] {
total_file_size += other.file_size;
total_file_size += other.sst_size;
}
let start_idx = select_file_ranges[left].0;
let mut end_idx = start_idx + 1;
Expand All @@ -99,10 +99,10 @@ impl MinOverlappingPicker {
{
break;
}
select_file_size += select_tables[*idx].file_size;
select_file_size += select_tables[*idx].sst_size;
if range.end > target_level_overlap_range.end {
for other in &target_tables[target_level_overlap_range.end..range.end] {
total_file_size += other.file_size;
total_file_size += other.sst_size;
}
target_level_overlap_range.end = range.end;
}
Expand Down Expand Up @@ -149,8 +149,8 @@ impl CompactionPicker for MinOverlappingPicker {
return None;
}
Some(CompactionInput {
select_input_size: select_input_ssts.iter().map(|sst| sst.file_size).sum(),
target_input_size: target_input_ssts.iter().map(|sst| sst.file_size).sum(),
select_input_size: select_input_ssts.iter().map(|sst| sst.sst_size).sum(),
target_input_size: target_input_ssts.iter().map(|sst| sst.sst_size).sum(),
total_file_count: (select_input_ssts.len() + target_input_ssts.len()) as u64,
input_levels: vec![
InputLevel {
Expand Down Expand Up @@ -310,7 +310,7 @@ impl NonOverlapSubLevelPicker {
}
basic_overlap_info.update(other);

add_files_size += other.file_size;
add_files_size += other.sst_size;
add_files_count += 1;
}

Expand Down Expand Up @@ -339,7 +339,7 @@ impl NonOverlapSubLevelPicker {
ret.total_file_count += ret.sstable_infos[reverse_index].len();
ret.total_file_size += ret.sstable_infos[reverse_index]
.iter()
.map(|sst| sst.file_size)
.map(|sst| sst.sst_size)
.sum::<u64>();
}

Expand All @@ -349,7 +349,7 @@ impl NonOverlapSubLevelPicker {
});
} else {
ret.total_file_count = 1;
ret.total_file_size = sst.file_size;
ret.total_file_size = sst.sst_size;
ret.sstable_infos[0].extend(vec![sst.clone()]);
}

Expand All @@ -370,7 +370,7 @@ impl NonOverlapSubLevelPicker {
let mut total_level_count = 0;
for (index, sstables) in ret.sstable_infos.iter().enumerate() {
total_file_count += sstables.len();
total_file_size += sstables.iter().map(|sst| sst.file_size).sum::<u64>();
total_file_size += sstables.iter().map(|sst| sst.sst_size).sum::<u64>();
total_level_count += 1;

// Atleast `min_expected_level_count`` level should be selected
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ impl SpaceReclaimCompactionPicker {
}
if !select_input_ssts.is_empty() {
return Some(CompactionInput {
select_input_size: select_input_ssts.iter().map(|sst| sst.file_size).sum(),
select_input_size: select_input_ssts.iter().map(|sst| sst.sst_size).sum(),
total_file_count: select_input_ssts.len() as u64,
input_levels: vec![
InputLevel {
Expand Down Expand Up @@ -140,7 +140,7 @@ impl SpaceReclaimCompactionPicker {
// turn to next_round
if !select_input_ssts.is_empty() {
return Some(CompactionInput {
select_input_size: select_input_ssts.iter().map(|sst| sst.file_size).sum(),
select_input_size: select_input_ssts.iter().map(|sst| sst.sst_size).sum(),
total_file_count: select_input_ssts.len() as u64,
input_levels: vec![
InputLevel {
Expand Down Expand Up @@ -311,7 +311,7 @@ mod test {
let select_file_size: u64 = task.input.input_levels[0]
.table_infos
.iter()
.map(|sst| sst.file_size)
.map(|sst| sst.sst_size)
.sum();
assert!(select_file_size > max_space_reclaim_bytes);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,11 @@ impl TombstoneReclaimCompactionPicker {
}
};
return Some(CompactionInput {
select_input_size: select_input_ssts.iter().map(|sst| sst.file_size).sum(),
select_input_size: select_input_ssts.iter().map(|sst| sst.sst_size).sum(),
target_input_size: target_level
.table_infos
.iter()
.map(|sst| sst.file_size)
.map(|sst| sst.sst_size)
.sum(),
total_file_count: (select_input_ssts.len() + target_level.table_infos.len())
as u64,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ impl TrivialMovePicker {
self.pick_trivial_move_sst(select_tables, target_tables, level_handlers, stats)
{
return Some(CompactionInput {
select_input_size: trivial_move_sst.file_size,
select_input_size: trivial_move_sst.sst_size,
total_file_count: 1,
input_levels: vec![
InputLevel {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ impl TtlReclaimCompactionPicker {
});

Some(CompactionInput {
select_input_size: select_input_ssts.iter().map(|sst| sst.file_size).sum(),
select_input_size: select_input_ssts.iter().map(|sst| sst.sst_size).sum(),
total_file_count: select_input_ssts.len() as _,
input_levels: vec![
InputLevel {
Expand Down
6 changes: 3 additions & 3 deletions src/meta/src/hummock/compaction/selector/level_selector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ impl DynamicLevelSelectorCore {
})
.map(|level| level.total_file_size)
.sum::<u64>()
- handlers[0].get_pending_output_file_size(ctx.base_level as u32);
.saturating_sub(handlers[0].get_pending_output_file_size(ctx.base_level as u32));
let base_level_size = levels.get_level(ctx.base_level).total_file_size;
let base_level_sst_count = levels.get_level(ctx.base_level).table_infos.len() as u64;

Expand Down Expand Up @@ -528,7 +528,7 @@ pub mod tests {
levels.levels[3].total_file_size = levels.levels[3]
.table_infos
.iter()
.map(|sst| sst.file_size)
.map(|sst| sst.sst_size)
.sum::<u64>();

let ctx = selector.calculate_level_base_size(&levels);
Expand All @@ -555,7 +555,7 @@ pub mod tests {
levels.levels[0].total_file_size = levels.levels[0]
.table_infos
.iter()
.map(|sst| sst.file_size)
.map(|sst| sst.sst_size)
.sum::<u64>();

let ctx = selector.calculate_level_base_size(&levels);
Expand Down
27 changes: 16 additions & 11 deletions src/meta/src/hummock/compaction/selector/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,11 @@ pub mod tests {
use crate::hummock::test_utils::iterator_test_key_of_epoch;

pub fn push_table_level0_overlapping(levels: &mut Levels, sst: SstableInfo) {
levels.l0.total_file_size += sst.file_size;
levels.l0.total_file_size += sst.sst_size;
levels.l0.sub_levels.push(Level {
level_idx: 0,
level_type: LevelType::Overlapping,
total_file_size: sst.file_size,
total_file_size: sst.sst_size,
uncompressed_file_size: sst.uncompressed_file_size,
sub_level_id: sst.sst_id,
table_infos: vec![sst],
Expand All @@ -154,7 +154,7 @@ pub mod tests {
}

pub fn push_tables_level0_nonoverlapping(levels: &mut Levels, table_infos: Vec<SstableInfo>) {
let total_file_size = table_infos.iter().map(|table| table.file_size).sum::<u64>();
let total_file_size = table_infos.iter().map(|table| table.sst_size).sum::<u64>();
let uncompressed_file_size = table_infos
.iter()
.map(|table| table.uncompressed_file_size)
Expand All @@ -179,6 +179,7 @@ pub mod tests {
right: usize,
epoch: u64,
) -> SstableInfo {
let object_size = (right - left + 1) as u64;
SstableInfo {
object_id: id,
sst_id: id,
Expand All @@ -187,10 +188,11 @@ pub mod tests {
right: iterator_test_key_of_epoch(table_prefix, right, epoch).into(),
right_exclusive: false,
},
file_size: (right - left + 1) as u64,
file_size: object_size,
table_ids: vec![table_prefix as u32],
uncompressed_file_size: (right - left + 1) as u64,
total_key_count: (right - left + 1) as u64,
sst_size: object_size,
..Default::default()
}
}
Expand All @@ -206,6 +208,7 @@ pub mod tests {
min_epoch: u64,
max_epoch: u64,
) -> SstableInfo {
let object_size = (right - left + 1) as u64;
SstableInfo {
object_id: id,
sst_id: id,
Expand All @@ -214,11 +217,12 @@ pub mod tests {
right: iterator_test_key_of_epoch(table_prefix, right, epoch).into(),
right_exclusive: false,
},
file_size: (right - left + 1) as u64,
file_size: object_size,
table_ids,
uncompressed_file_size: (right - left + 1) as u64,
uncompressed_file_size: object_size,
min_epoch,
max_epoch,
sst_size: object_size,
..Default::default()
}
}
Expand All @@ -235,14 +239,15 @@ pub mod tests {
for id in ids {
let mut table = generate_table(id, 1, start, start + step - 1, epoch);
table.file_size = file_size;
table.sst_size = file_size;
tables.push(table);
start += step;
}
tables
}

pub fn generate_level(level_idx: u32, table_infos: Vec<SstableInfo>) -> Level {
let total_file_size = table_infos.iter().map(|sst| sst.file_size).sum();
let total_file_size = table_infos.iter().map(|sst| sst.sst_size).sum();
let uncompressed_file_size = table_infos
.iter()
.map(|sst| sst.uncompressed_file_size)
Expand All @@ -261,7 +266,7 @@ pub mod tests {
/// Returns a `OverlappingLevel`, with each `table_infos`'s element placed in a nonoverlapping
/// sub-level.
pub fn generate_l0_nonoverlapping_sublevels(table_infos: Vec<SstableInfo>) -> OverlappingLevel {
let total_file_size = table_infos.iter().map(|table| table.file_size).sum::<u64>();
let total_file_size = table_infos.iter().map(|table| table.sst_size).sum::<u64>();
let uncompressed_file_size = table_infos
.iter()
.map(|table| table.uncompressed_file_size)
Expand All @@ -273,7 +278,7 @@ pub mod tests {
.map(|(idx, table)| Level {
level_idx: 0,
level_type: LevelType::Nonoverlapping,
total_file_size: table.file_size,
total_file_size: table.sst_size,
uncompressed_file_size: table.uncompressed_file_size,
sub_level_id: idx as u64,
table_infos: vec![table],
Expand All @@ -295,7 +300,7 @@ pub mod tests {
.map(|(idx, table)| Level {
level_idx: 0,
level_type: LevelType::Nonoverlapping,
total_file_size: table.iter().map(|table| table.file_size).sum::<u64>(),
total_file_size: table.iter().map(|table| table.sst_size).sum::<u64>(),
uncompressed_file_size: table
.iter()
.map(|sst| sst.uncompressed_file_size)
Expand Down Expand Up @@ -330,7 +335,7 @@ pub mod tests {
.map(|(idx, table)| Level {
level_idx: 0,
level_type: LevelType::Overlapping,
total_file_size: table.iter().map(|table| table.file_size).sum::<u64>(),
total_file_size: table.iter().map(|table| table.sst_size).sum::<u64>(),
sub_level_id: idx as u64,
table_infos: table.clone(),
uncompressed_file_size: table
Expand Down
2 changes: 1 addition & 1 deletion src/meta/src/hummock/level_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ impl LevelHandler {
let mut total_file_size = 0;
for sst in ssts {
self.compacting_files.insert(sst.sst_id, task_id);
total_file_size += sst.file_size;
total_file_size += sst.sst_size;
table_ids.push(sst.sst_id);
}

Expand Down
2 changes: 1 addition & 1 deletion src/meta/src/hummock/manager/compaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1508,7 +1508,7 @@ impl HummockManager {
existing_table_ids.extend(sst.table_ids.iter());
for table_id in &sst.table_ids {
*table_size_info.entry(*table_id).or_default() +=
sst.file_size / (sst.table_ids.len() as u64);
sst.sst_size / (sst.table_ids.len() as u64);
}
}
}
Expand Down
Loading

0 comments on commit 4e3b9ff

Please sign in to comment.