Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(meta): do not split by vnode for low write throughput #12534

Merged
merged 27 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
3dcff6b
do not split too many files
Little-Wallace Sep 26, 2023
35d542f
fix config
Little-Wallace Sep 26, 2023
af781bc
fix test
Little-Wallace Sep 26, 2023
8f440fe
Merge branch 'main' into wallace/partition-vnode
Little-Wallace Sep 26, 2023
a6a5f28
fix conflict
Little-Wallace Sep 27, 2023
2a0d45e
Merge branch 'main' into wallace/partition-vnode
Little-Wallace Oct 9, 2023
4be7e79
Merge branch 'main' into wallace/partition-vnode
Little-Wallace Oct 18, 2023
950ed96
fix metrics
Little-Wallace Oct 19, 2023
cca903b
fix score
Little-Wallace Oct 20, 2023
dc4445f
fix base level compact
Little-Wallace Oct 20, 2023
199038a
fix score
Little-Wallace Oct 25, 2023
4c144ea
do not pick no partition level
Little-Wallace Oct 25, 2023
b8b7761
add warn log
Little-Wallace Oct 26, 2023
fc2661d
fix trivial move
Little-Wallace Oct 26, 2023
c3db7bc
remove log
Little-Wallace Oct 27, 2023
ee22ef5
remove warn
Little-Wallace Oct 27, 2023
5f493ba
fix check
Little-Wallace Oct 27, 2023
50e9572
Merge branch 'main' into wallace/partition-vnode
Little-Wallace Oct 27, 2023
f1e6aaf
address comment
Little-Wallace Oct 31, 2023
cc4289a
Merge branch 'main' into wallace/partition-vnode
Little-Wallace Nov 14, 2023
e466a1d
Merge branch 'main' into wallace/partition-vnode
Little-Wallace Dec 13, 2023
856bf4d
fix conflict
Little-Wallace Dec 13, 2023
d0eee2d
fix test
Little-Wallace Dec 13, 2023
6cf7c83
add log for skip by write amp
Little-Wallace Dec 15, 2023
4805b7f
do not wait too much files
Little-Wallace Dec 15, 2023
b3433d5
fix ut
Little-Wallace Dec 18, 2023
a28859c
Merge branch 'main' into wallace/partition-vnode
Little-Wallace Jan 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions proto/hummock.proto
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ message Level {
uint64 total_file_size = 4;
uint64 sub_level_id = 5;
uint64 uncompressed_file_size = 6;
uint32 vnode_partition_count = 7;
}

message InputLevel {
Expand All @@ -62,6 +63,7 @@ message IntraLevelDelta {
uint64 l0_sub_level_id = 2;
repeated uint64 removed_table_ids = 3;
repeated SstableInfo inserted_table_infos = 4;
uint32 vnode_partition_count = 5;
}

enum CompatibilityVersion {
Expand Down Expand Up @@ -116,6 +118,7 @@ message HummockVersion {
uint64 group_id = 3;
uint64 parent_group_id = 4;
repeated uint32 member_table_ids = 5;
uint32 vnode_partition_count = 6;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you explain when this field is initialized / set?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

set in function build_initial_compaction_group_levels.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So for existing compaction groups, vnode_partition_count is 0 and cannot be changed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 Good catch.
I will refactor this code to avoid compatibility issues

}
uint64 id = 1;
// Levels of each compaction group
Expand Down
2 changes: 1 addition & 1 deletion src/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,7 @@ pub mod default {
}

pub fn partition_vnode_count() -> u32 {
64
16
}

pub fn table_write_throughput_threshold() -> u64 {
Expand Down
3 changes: 1 addition & 2 deletions src/config/example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ periodic_split_compact_group_interval_sec = 180
move_table_size_limit = 10737418240
split_group_size_limit = 68719476736
do_not_config_object_storage_lifecycle = false
partition_vnode_count = 64
partition_vnode_count = 16
table_write_throughput_threshold = 16777216
min_table_split_write_throughput = 4194304
compaction_task_max_heartbeat_interval_secs = 60
Expand Down Expand Up @@ -111,7 +111,6 @@ compact_iter_recreate_timeout_ms = 600000
compactor_max_sst_size = 536870912
enable_fast_compaction = true


[storage.data_file_cache]
dir = ""
capacity_mb = 1024
Expand Down
2 changes: 1 addition & 1 deletion src/meta/src/hummock/compaction/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ impl CompactStatus {
target_sub_level_id: ret.input.target_sub_level_id,
task_type: ret.compaction_task_type as i32,
split_by_state_table: group.compaction_config.split_by_state_table,
split_weight_by_vnode: group.compaction_config.split_weight_by_vnode,
split_weight_by_vnode: ret.input.vnode_partition_count,
zwang28 marked this conversation as resolved.
Show resolved Hide resolved
};
Some(compact_task)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,21 @@ impl CompactionPicker for LevelCompactionPicker {
return None;
}

if let Some(ret) = self.pick_base_trivial_move(
if let Some(mut ret) = self.pick_base_trivial_move(
l0,
levels.get_level(self.target_level),
level_handlers,
stats,
) {
ret.vnode_partition_count = levels.vnode_partition_count;
return Some(ret);
}

debug_assert!(self.target_level == levels.get_level(self.target_level).level_idx as usize);
if let Some(ret) = self.pick_multi_level_to_base(
l0,
levels.get_level(self.target_level),
levels.vnode_partition_count,
level_handlers,
stats,
) {
Expand Down Expand Up @@ -128,6 +130,7 @@ impl LevelCompactionPicker {
&self,
l0: &OverlappingLevel,
target_level: &Level,
vnode_partition_count: u32,
level_handlers: &[LevelHandler],
stats: &mut LocalPickerStatistic,
) -> Option<CompactionInput> {
Expand All @@ -147,8 +150,18 @@ impl LevelCompactionPicker {
overlap_strategy.clone(),
);

let l0_select_tables_vec = non_overlap_sub_level_picker
.pick_l0_multi_non_overlap_level(&l0.sub_levels, &level_handlers[0]);
let mut max_vnode_partition_idx = 0;
for (idx, level) in l0.sub_levels.iter().enumerate() {
if level.vnode_partition_count < vnode_partition_count {
break;
}
max_vnode_partition_idx = idx;
}

let l0_select_tables_vec = non_overlap_sub_level_picker.pick_l0_multi_non_overlap_level(
&l0.sub_levels[..=max_vnode_partition_idx],
&level_handlers[0],
);
if l0_select_tables_vec.is_empty() {
stats.skip_by_pending_files += 1;
return None;
Expand Down Expand Up @@ -217,6 +230,7 @@ impl LevelCompactionPicker {
select_input_size: input.total_file_size,
target_input_size: target_file_size,
total_file_count: (input.total_file_count + target_file_count) as u64,
vnode_partition_count,
..Default::default()
};

Expand Down Expand Up @@ -423,6 +437,7 @@ pub mod tests {
total_file_size: 0,
sub_level_id: 0,
uncompressed_file_size: 0,
..Default::default()
}];
let mut levels = Levels {
levels,
Expand Down Expand Up @@ -487,6 +502,7 @@ pub mod tests {
total_file_size: 900,
sub_level_id: 0,
uncompressed_file_size: 900,
..Default::default()
}],
l0: Some(generate_l0_nonoverlapping_sublevels(vec![])),
..Default::default()
Expand Down
131 changes: 116 additions & 15 deletions src/meta/src/hummock/compaction/picker/intra_compaction_picker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,16 @@ impl CompactionPicker for IntraCompactionPicker {
return None;
}

if let Some(ret) = self.pick_l0_intra(l0, &level_handlers[0], stats) {
let vnode_partition_count = levels.vnode_partition_count;

if let Some(ret) =
self.pick_whole_level(l0, &level_handlers[0], vnode_partition_count, stats)
{
return Some(ret);
}

if let Some(ret) = self.pick_l0_intra(l0, &level_handlers[0], vnode_partition_count, stats)
{
return Some(ret);
}

Expand All @@ -84,13 +93,102 @@ impl IntraCompactionPicker {
}
}

fn pick_whole_level(
&self,
l0: &OverlappingLevel,
level_handler: &LevelHandler,
partition_count: u32,
stats: &mut LocalPickerStatistic,
) -> Option<CompactionInput> {
if partition_count == 0 {
return None;
}
for (idx, level) in l0.sub_levels.iter().enumerate() {
if level.level_type() != LevelType::Nonoverlapping
|| level.vnode_partition_count == partition_count
{
continue;
}

let max_compaction_bytes = std::cmp::max(
self.config.max_bytes_for_level_base,
self.config.sub_level_max_compaction_bytes
* (self.config.level0_sub_level_compact_level_count as u64),
);

let mut select_input_size = 0;

let mut select_level_inputs = vec![];
let mut total_file_count = 0;
let mut wait_enough = false;
for next_level in l0.sub_levels.iter().skip(idx) {
if select_input_size > max_compaction_bytes
|| total_file_count > self.config.level0_max_compact_file_number
|| (next_level.vnode_partition_count == partition_count
&& select_level_inputs.len() > 1)
{
wait_enough = true;
break;
}

if level_handler.is_level_pending_compact(next_level) {
break;
}

select_input_size += next_level.total_file_size;
total_file_count += next_level.table_infos.len() as u64;

select_level_inputs.push(InputLevel {
level_idx: 0,
level_type: next_level.level_type,
table_infos: next_level.table_infos.clone(),
});
}
if !select_level_inputs.is_empty() {
let vnode_partition_count =
if select_input_size > self.config.sub_level_max_compaction_bytes / 2 {
partition_count
} else {
0
};
let result = CompactionInput {
input_levels: select_level_inputs,
target_sub_level_id: level.sub_level_id,
select_input_size,
total_file_count,
vnode_partition_count,
..Default::default()
};
if wait_enough
|| self.compaction_task_validator.valid_compact_task(
&result,
ValidationRuleType::Intra,
stats,
)
{
return Some(result);
}
}
}

None
}

fn pick_l0_intra(
&self,
l0: &OverlappingLevel,
level_handler: &LevelHandler,
vnode_partition_count: u32,
stats: &mut LocalPickerStatistic,
) -> Option<CompactionInput> {
let overlap_strategy = create_overlap_strategy(self.config.compaction_mode());
let mut max_vnode_partition_idx = 0;
for (idx, level) in l0.sub_levels.iter().enumerate() {
if level.vnode_partition_count < vnode_partition_count {
break;
}
max_vnode_partition_idx = idx;
}

for (idx, level) in l0.sub_levels.iter().enumerate() {
if level.level_type() != LevelType::Nonoverlapping
Expand All @@ -99,6 +197,10 @@ impl IntraCompactionPicker {
continue;
}

if idx > max_vnode_partition_idx {
break;
}

if level_handler.is_level_all_pending_compact(level) {
continue;
}
Expand All @@ -117,7 +219,10 @@ impl IntraCompactionPicker {
);

let l0_select_tables_vec = non_overlap_sub_level_picker
.pick_l0_multi_non_overlap_level(&l0.sub_levels[idx..], level_handler);
.pick_l0_multi_non_overlap_level(
&l0.sub_levels[idx..=max_vnode_partition_idx],
level_handler,
);

if l0_select_tables_vec.is_empty() {
continue;
Expand Down Expand Up @@ -192,6 +297,12 @@ impl IntraCompactionPicker {
continue;
}

if l0.sub_levels[idx + 1].vnode_partition_count
!= l0.sub_levels[idx].vnode_partition_count
{
continue;
}

let trivial_move_picker = TrivialMovePicker::new(0, 0, overlap_strategy.clone());

let select_sst = trivial_move_picker.pick_trivial_move_sst(
Expand Down Expand Up @@ -281,14 +392,11 @@ pub mod tests {
fn test_l0_to_l1_compact_conflict() {
// When picking L0->L1, L0's selecting_key_range should not be overlapped with L0's
// compacting_key_range.
let mut picker = create_compaction_picker_for_test();
let levels = vec![Level {
level_idx: 1,
level_type: LevelType::Nonoverlapping as i32,
table_infos: vec![],
total_file_size: 0,
sub_level_id: 0,
uncompressed_file_size: 0,
..Default::default()
}];
let mut levels = Levels {
levels,
Expand All @@ -307,14 +415,9 @@ pub mod tests {
generate_table(2, 1, 350, 500, 2),
],
);
let mut levels_handler = vec![LevelHandler::new(0), LevelHandler::new(1)];
let levels_handler = vec![LevelHandler::new(0), LevelHandler::new(1)];

let mut local_stats = LocalPickerStatistic::default();
let ret = picker
.pick_compaction(&levels, &levels_handler, &mut local_stats)
.unwrap();
// trivial_move
ret.add_pending_task(0, &mut levels_handler); // pending only for test
push_tables_level0_nonoverlapping(&mut levels, vec![generate_table(3, 1, 250, 300, 3)]);
let config: CompactionConfig = CompactionConfigBuilder::new()
.level0_tier_compact_file_number(2)
Expand All @@ -341,9 +444,7 @@ pub mod tests {
level_idx: 1,
level_type: LevelType::Nonoverlapping as i32,
table_infos: vec![generate_table(3, 1, 200, 300, 2)],
total_file_size: 0,
sub_level_id: 0,
uncompressed_file_size: 0,
..Default::default()
}],
l0: Some(generate_l0_nonoverlapping_sublevels(vec![
generate_table(1, 1, 100, 210, 2),
Expand Down
Loading
Loading