Skip to content

Commit

Permalink
fix large lsm tree
Browse files Browse the repository at this point in the history
Signed-off-by: Little-Wallace <[email protected]>
  • Loading branch information
Little-Wallace committed Apr 23, 2024
1 parent 1657ab0 commit 19e6fb9
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 14 deletions.
4 changes: 3 additions & 1 deletion src/meta/src/hummock/compaction/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,9 @@ pub fn create_compaction_task(
) -> CompactionTask {
let target_file_size = if input.target_level == 0 {
compaction_config.target_file_size_base
} else if input.target_level == base_level {
} else if input.target_level == base_level
&& input.target_input_size <= compaction_config.max_bytes_for_level_base * 2
{
// This is just a temporary optimization measure. We hope to reduce the size of SST as much
// as possible to reduce the amount of data blocked by a single task during compaction,
// but too many files will increase computing overhead.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ use crate::hummock::compaction::picker::TrivialMovePicker;
use crate::hummock::compaction::{create_overlap_strategy, CompactionDeveloperConfig};
use crate::hummock::level_handler::LevelHandler;

const L0_MAX_SIZE: u64 = 32 * 1024 * 1024 * 1024; // 32GB

pub struct LevelCompactionPicker {
target_level: usize,
config: Arc<CompactionConfig>,
Expand Down Expand Up @@ -148,14 +150,22 @@ impl LevelCompactionPicker {
) -> Option<CompactionInput> {
let overlap_strategy = create_overlap_strategy(self.config.compaction_mode());
let min_compaction_bytes = self.config.sub_level_max_compaction_bytes;
let non_overlap_sub_level_picker = NonOverlapSubLevelPicker::new(
min_compaction_bytes,
let max_compaction_bytes = if l0.total_file_size > L0_MAX_SIZE {
std::cmp::max(
target_level.total_file_size,
self.config.max_compaction_bytes * 4,
)
} else {
// divide by 2 because we need to select files of base level and it need use the other
// half quota.
std::cmp::max(
self.config.max_bytes_for_level_base,
self.config.max_compaction_bytes / 2,
),
)
};
let non_overlap_sub_level_picker = NonOverlapSubLevelPicker::new(
min_compaction_bytes,
max_compaction_bytes,
1,
// The maximum number of sub_level compact level per task
self.config.level0_max_compact_file_number,
Expand Down
60 changes: 50 additions & 10 deletions src/meta/src/hummock/compaction/selector/level_selector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,11 @@ impl DynamicLevelSelectorCore {
base_bytes_min + 1
} else {
ctx.base_level = first_non_empty_level;
while ctx.base_level > 1 && cur_level_size > base_bytes_max {
while ctx.base_level > 1 && cur_level_size > base_bytes_max * 2 {
ctx.base_level -= 1;
cur_level_size /= self.config.max_bytes_for_level_multiplier;
}
std::cmp::min(base_bytes_max, cur_level_size)
cur_level_size
};

let level_multiplier = self.config.max_bytes_for_level_multiplier as f64;
Expand Down Expand Up @@ -314,14 +314,9 @@ impl DynamicLevelSelectorCore {
if level_idx < ctx.base_level || level_idx >= self.config.max_level as usize {
continue;
}
let upper_level = if level_idx == ctx.base_level {
0
} else {
level_idx - 1
};
let total_size = level.total_file_size
+ handlers[upper_level].get_pending_output_file_size(level.level_idx)
- handlers[level_idx].get_pending_output_file_size(level.level_idx + 1);
let total_size = level.total_file_size.saturating_sub(
handlers[level_idx].get_pending_output_file_size(level.level_idx + 1),
);
if total_size == 0 {
continue;
}
Expand Down Expand Up @@ -756,4 +751,49 @@ pub mod tests {
let compact_pending_bytes = dynamic_level_core.compact_pending_bytes_needed(&levels);
assert_eq!(24400 + 40110 + 47281, compact_pending_bytes);
}

#[test]
fn test_compact_score() {
let config = CompactionConfigBuilder::new().build();
let mut levels = vec![
generate_level(1, generate_tables(1..2, 0..1000, 3, 500)),
generate_level(2, generate_tables(2..3, 0..1000, 3, 500)),
generate_level(3, generate_tables(3..4, 0..1000, 2, 500)),
generate_level(4, generate_tables(4..5, 0..1000, 1, 1000)),
generate_level(5, generate_tables(5..6, 0..1000, 1, 1000)),
generate_level(6, generate_tables(6..7, 0..1000, 1, 1000)),
];
levels[0].total_file_size = 2048 * 1024 * 1024;
levels[1].total_file_size = 5 * 2048 * 1024 * 1024;
levels[2].total_file_size = 25 * 2048 * 1024 * 1024;
levels[3].total_file_size = 125 * 2048 * 1024 * 1024;
levels[4].total_file_size = 625 * 2048 * 1024 * 1024;
levels[5].total_file_size = 3125 * 2048 * 1024 * 1024;
let levels = Levels {
levels,
l0: Some(generate_l0_nonoverlapping_sublevels(generate_tables(
15..25,
0..600,
3,
100,
))),
..Default::default()
};
let config = Arc::new(config);
let dynamic_level_core = DynamicLevelSelectorCore::new(
config.clone(),
Arc::new(CompactionDeveloperConfig::default()),
);
let ctx = dynamic_level_core.calculate_level_base_size(&levels);
assert!(ctx.level_max_bytes[0] > config.max_bytes_for_level_base);
for (idx, sz) in ctx.level_max_bytes.iter().enumerate() {
println!("level[{}]: {}MB", idx, *sz / 1024 / 1024);
}
let levels_handlers = (0..7).map(LevelHandler::new).collect_vec();
let ctx = dynamic_level_core.get_priority_levels(&levels, &levels_handlers);
for info in &ctx.score_levels {
assert_eq!(info.score, 100);
}

}
}

0 comments on commit 19e6fb9

Please sign in to comment.