Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(storage): optimize data alignment for default compaction group #13075

Merged
merged 19 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
62a9cdf
feat(storage): optimize data alignment for default compaction group
Li0k Oct 26, 2023
e3e51d0
Merge branch 'main' of https://github.com/risingwavelabs/risingwave i…
Li0k Oct 26, 2023
9893724
chore(storage): remove split by table
Li0k Oct 31, 2023
0e21215
Merge branch 'main' of https://github.com/risingwavelabs/risingwave i…
Li0k Oct 31, 2023
98779dd
fix(storage): fix test
Li0k Oct 31, 2023
3ef2635
fix(storage): fix comments
Li0k Nov 1, 2023
e2d79f8
Merge branch 'main' of https://github.com/risingwavelabs/risingwave i…
Li0k Nov 13, 2023
1003115
fix(storage): update branch
Li0k Nov 13, 2023
9e2c49e
Merge branch 'main' of https://github.com/risingwavelabs/risingwave i…
Li0k Nov 13, 2023
3e0959c
chore(storage): address comments
Li0k Nov 20, 2023
c0e1121
Merge branch 'main' of https://github.com/risingwavelabs/risingwave i…
Li0k Nov 20, 2023
e58d6f1
fix(storage): fix unit-test
Li0k Nov 20, 2023
b8a664e
Merge branch 'main' of https://github.com/risingwavelabs/risingwave i…
Li0k Nov 21, 2023
6b56485
refactor(storage): refactor code and fix comments
Li0k Nov 27, 2023
ec2210b
Merge branch 'main' of https://github.com/risingwavelabs/risingwave i…
Li0k Nov 27, 2023
6f2b69e
Merge branch 'main' of https://github.com/risingwavelabs/risingwave i…
Li0k Nov 28, 2023
3833fe7
chore(storage): add docs
Li0k Nov 30, 2023
10f960d
Merge branch 'main' of https://github.com/risingwavelabs/risingwave i…
Li0k Nov 30, 2023
840dca3
Merge branch 'main' of https://github.com/risingwavelabs/risingwave i…
Li0k Dec 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions proto/hummock.proto
Original file line number Diff line number Diff line change
Expand Up @@ -307,9 +307,13 @@ message CompactTask {

// Identifies whether the task is space_reclaim, if the compact_task_type increases, it will be refactored to enum
TaskType task_type = 20;
bool split_by_state_table = 21;

// Deprecated. use table_vnode_partition instead;
bool split_by_state_table = 21 [deprecated = true];
// Compaction needs to cut the state table every time 1/weight of vnodes in the table have been processed.
uint32 split_weight_by_vnode = 22;
// Deprecated. use table_vnode_partition instead;
uint32 split_weight_by_vnode = 22 [deprecated = true];
map<uint32, uint32> table_vnode_partition = 23;
}

message LevelHandler {
Expand Down
15 changes: 14 additions & 1 deletion src/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,9 @@ pub struct MetaConfig {
#[serde(default = "default::meta::split_group_size_limit")]
pub split_group_size_limit: u64,

#[serde(default = "default::meta::cut_table_size_limit")]
pub cut_table_size_limit: u64,

#[serde(default, flatten)]
pub unrecognized: Unrecognized<Self>,

Expand All @@ -292,6 +295,8 @@ pub struct MetaConfig {
#[serde(default)]
pub compaction_config: CompactionConfig,

#[serde(default = "default::meta::hybird_partition_vnode_count")]
pub hybird_partition_vnode_count: u32,
Li0k marked this conversation as resolved.
Show resolved Hide resolved
#[serde(default = "default::meta::event_log_enabled")]
pub event_log_enabled: bool,
/// Keeps the latest N events per channel.
Expand Down Expand Up @@ -973,7 +978,7 @@ pub mod default {
}

pub fn periodic_split_compact_group_interval_sec() -> u64 {
180 // 3mi
10 // 10s
}

pub fn periodic_tombstone_reclaim_compaction_interval_sec() -> u64 {
Expand Down Expand Up @@ -1004,6 +1009,14 @@ pub mod default {
60 // 1min
}

pub fn cut_table_size_limit() -> u64 {
1024 * 1024 * 1024 // 1GB
}

pub fn hybird_partition_vnode_count() -> u32 {
4
}

pub fn event_log_enabled() -> bool {
true
}
Expand Down
4 changes: 3 additions & 1 deletion src/config/example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,16 @@ backend = "Mem"
periodic_space_reclaim_compaction_interval_sec = 3600
periodic_ttl_reclaim_compaction_interval_sec = 1800
periodic_tombstone_reclaim_compaction_interval_sec = 600
periodic_split_compact_group_interval_sec = 180
periodic_split_compact_group_interval_sec = 10
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change expected?

Copy link
Contributor Author

@Li0k Li0k Nov 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I think this check should be more sensitive (for backfill), what do you think?

move_table_size_limit = 10737418240
split_group_size_limit = 68719476736
cut_table_size_limit = 1073741824
do_not_config_object_storage_lifecycle = false
partition_vnode_count = 64
table_write_throughput_threshold = 16777216
min_table_split_write_throughput = 4194304
compaction_task_max_heartbeat_interval_secs = 60
hybird_partition_vnode_count = 4
event_log_enabled = true
event_log_channel_max_size = 10

Expand Down
2 changes: 2 additions & 0 deletions src/meta/node/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ pub fn start(opts: MetaNodeOpts) -> Pin<Box<dyn Future<Output = ()> + Send>> {
.meta
.compaction_task_max_heartbeat_interval_secs,
compaction_config: Some(config.meta.compaction_config),
cut_table_size_limit: config.meta.cut_table_size_limit,
hybird_partition_vnode_count: config.meta.hybird_partition_vnode_count,
event_log_enabled: config.meta.event_log_enabled,
event_log_channel_max_size: config.meta.event_log_channel_max_size,
advertise_addr: opts.advertise_addr,
Expand Down
6 changes: 2 additions & 4 deletions src/meta/src/hummock/compaction/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ pub struct CompactionTask {
pub compression_algorithm: String,
pub target_file_size: u64,
pub compaction_task_type: compact_task::TaskType,
pub enable_split_by_table: bool,
}

pub fn create_overlap_strategy(compaction_mode: CompactionMode) -> Arc<dyn OverlapStrategy> {
Expand Down Expand Up @@ -149,8 +148,8 @@ impl CompactStatus {
current_epoch_time: 0,
target_sub_level_id: ret.input.target_sub_level_id,
task_type: ret.compaction_task_type as i32,
split_by_state_table: group.compaction_config.split_by_state_table,
split_weight_by_vnode: group.compaction_config.split_weight_by_vnode,
table_vnode_partition: BTreeMap::default(),
..Default::default()
};
Some(compact_task)
}
Expand Down Expand Up @@ -238,7 +237,6 @@ pub fn create_compaction_task(
input,
target_file_size,
compaction_task_type,
enable_split_by_table: false,
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ pub struct MinOverlappingPicker {
level: usize,
target_level: usize,
max_select_bytes: u64,
split_by_table: bool,
overlap_strategy: Arc<dyn OverlapStrategy>,
}

Expand All @@ -37,14 +36,12 @@ impl MinOverlappingPicker {
level: usize,
target_level: usize,
max_select_bytes: u64,
split_by_table: bool,
overlap_strategy: Arc<dyn OverlapStrategy>,
) -> MinOverlappingPicker {
MinOverlappingPicker {
level,
target_level,
max_select_bytes,
split_by_table,
overlap_strategy,
}
}
Expand All @@ -66,9 +63,6 @@ impl MinOverlappingPicker {
if level_handlers[self.level].is_pending_compact(&table.sst_id) {
break;
}
if self.split_by_table && table.table_ids != select_tables[left].table_ids {
break;
}
if select_file_size > self.max_select_bytes {
break;
}
Expand Down Expand Up @@ -405,13 +399,8 @@ pub mod tests {

#[test]
fn test_compact_l1() {
let mut picker = MinOverlappingPicker::new(
1,
2,
10000,
false,
Arc::new(RangeOverlapStrategy::default()),
);
let mut picker =
MinOverlappingPicker::new(1, 2, 10000, Arc::new(RangeOverlapStrategy::default()));
let levels = vec![
Level {
level_idx: 1,
Expand Down Expand Up @@ -487,13 +476,8 @@ pub mod tests {

#[test]
fn test_expand_l1_files() {
let mut picker = MinOverlappingPicker::new(
1,
2,
10000,
false,
Arc::new(RangeOverlapStrategy::default()),
);
let mut picker =
MinOverlappingPicker::new(1, 2, 10000, Arc::new(RangeOverlapStrategy::default()));
let levels = vec![
Level {
level_idx: 1,
Expand Down Expand Up @@ -833,7 +817,7 @@ pub mod tests {
];
// no limit
let picker =
MinOverlappingPicker::new(2, 3, 1000, false, Arc::new(RangeOverlapStrategy::default()));
MinOverlappingPicker::new(2, 3, 1000, Arc::new(RangeOverlapStrategy::default()));
let (select_files, target_files) = picker.pick_tables(
&levels[1].table_infos,
&levels[2].table_infos,
Expand Down
1 change: 0 additions & 1 deletion src/meta/src/hummock/compaction/selector/level_selector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ impl DynamicLevelSelectorCore {
picker_info.select_level,
picker_info.target_level,
self.config.max_bytes_for_level_base,
self.config.split_by_state_table,
overlap_strategy,
))
}
Expand Down
32 changes: 22 additions & 10 deletions src/meta/src/hummock/manager/compaction_group_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use crate::hummock::error::{Error, Result};
use crate::hummock::manager::{drop_sst, read_lock, HummockManager};
use crate::hummock::metrics_utils::remove_compaction_group_in_sst_stat;
use crate::hummock::model::CompactionGroup;
use crate::manager::{IdCategory, MetaSrvEnv};
use crate::manager::{IdCategory, MetaSrvEnv, TableId};
use crate::model::{
BTreeMapEntryTransaction, BTreeMapTransaction, MetadataModel, TableFragments, ValTransaction,
};
Expand Down Expand Up @@ -403,6 +403,7 @@ impl HummockManager {
{
self.try_update_write_limits(compaction_group_ids).await;
}

Ok(result)
}

Expand Down Expand Up @@ -440,23 +441,30 @@ impl HummockManager {
parent_group_id: CompactionGroupId,
table_ids: &[StateTableId],
) -> Result<CompactionGroupId> {
self.move_state_table_to_compaction_group(parent_group_id, table_ids, None, false, 0)
.await
let result = self
.move_state_table_to_compaction_group(parent_group_id, table_ids, None, 0)
.await?;
self.group_to_table_vnode_partition
.write()
.insert(result.0, result.1);

Ok(result.0)
}

/// move some table to another compaction-group. Create a new compaction group if it does not
/// exist.
/// TODO: Move table_to_partition in result to compaction group
#[named]
pub async fn move_state_table_to_compaction_group(
&self,
parent_group_id: CompactionGroupId,
table_ids: &[StateTableId],
target_group_id: Option<CompactionGroupId>,
allow_split_by_table: bool,
weight_split_by_vnode: u32,
) -> Result<CompactionGroupId> {
partition_vnode_count: u32,
) -> Result<(CompactionGroupId, BTreeMap<TableId, u32>)> {
let mut table_to_partition = BTreeMap::default();
if table_ids.is_empty() {
return Ok(parent_group_id);
return Ok((parent_group_id, table_to_partition));
}
let table_ids = table_ids.iter().cloned().unique().collect_vec();
let mut compaction_guard = write_lock!(self, compaction).await;
Expand Down Expand Up @@ -561,8 +569,7 @@ impl HummockManager {
.read()
.await
.default_compaction_config();
config.split_by_state_table = allow_split_by_table;
config.split_weight_by_vnode = weight_split_by_vnode;
config.split_weight_by_vnode = partition_vnode_count;

new_version_delta.group_deltas.insert(
new_compaction_group_id,
Expand Down Expand Up @@ -615,6 +622,11 @@ impl HummockManager {
insert.apply_to_txn(&mut trx).await?;
self.env.meta_store().txn(trx).await?;
insert.commit();

// Currently, only splitting out a single table_id is supported.
for table_id in table_ids {
table_to_partition.insert(table_id, partition_vnode_count);
}
} else {
self.env.meta_store().txn(trx).await?;
}
Expand Down Expand Up @@ -688,7 +700,7 @@ impl HummockManager {
.with_label_values(&[&parent_group_id.to_string()])
.inc();

Ok(target_compaction_group_id)
Ok((target_compaction_group_id, table_to_partition))
}

#[named]
Expand Down
Loading
Loading