From 838300ef85932bd0157683b84cc3d1453d926814 Mon Sep 17 00:00:00 2001 From: Li0k Date: Fri, 31 May 2024 15:28:41 +0800 Subject: [PATCH] feat(compaction): adjust target_file_base calculation for base level (#17022) --- src/meta/src/hummock/compaction/mod.rs | 6 ------ src/meta/src/hummock/manager/compaction.rs | 10 +++------- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/src/meta/src/hummock/compaction/mod.rs b/src/meta/src/hummock/compaction/mod.rs index 49eeaa5778ab7..1d1c2fe4b99fb 100644 --- a/src/meta/src/hummock/compaction/mod.rs +++ b/src/meta/src/hummock/compaction/mod.rs @@ -193,12 +193,6 @@ pub fn create_compaction_task( ) -> CompactionTask { let target_file_size = if input.target_level == 0 { compaction_config.target_file_size_base - } else if input.target_level == base_level { - // This is just a temporary optimization measure. We hope to reduce the size of SST as much - // as possible to reduce the amount of data blocked by a single task during compaction, - // but too many files will increase computing overhead. - // TODO: remove it after can reduce configuration `target_file_size_base`. - compaction_config.target_file_size_base / 4 } else { assert!(input.target_level >= base_level); let step = (input.target_level - base_level) / 2; diff --git a/src/meta/src/hummock/manager/compaction.rs b/src/meta/src/hummock/manager/compaction.rs index 16ca79a30962d..de68301014014 100644 --- a/src/meta/src/hummock/manager/compaction.rs +++ b/src/meta/src/hummock/manager/compaction.rs @@ -831,13 +831,6 @@ impl HummockManager { .latest_version() .safe_epoch_table_watermarks(&compact_task.existing_table_ids); - // do not split sst by vnode partition when target_level > base_level - // The purpose of data alignment is mainly to improve the parallelism of base level compaction and reduce write amplification. - // However, at high level, the size of the sst file is often larger and only contains the data of a single table_id, so there is no need to cut it. - if compact_task.target_level > compact_task.base_level { - compact_task.table_vnode_partition.clear(); - } - if self.env.opts.enable_dropped_column_reclaim { // TODO: get all table schemas for all tables in once call to avoid acquiring lock and await. compact_task.table_schemas = match self.metadata_manager() { @@ -1455,6 +1448,9 @@ impl HummockManager { compact_task: &mut CompactTask, compaction_config: &CompactionConfig, ) { + // do not split sst by vnode partition when target_level > base_level + // The purpose of data alignment is mainly to improve the parallelism of base level compaction and reduce write amplification. + // However, at high level, the size of the sst file is often larger and only contains the data of a single table_id, so there is no need to cut it. if compact_task.target_level > compact_task.base_level { return; }