Skip to content

Commit

Permalink
fix(storage): add disk cache buffer threshold to mitigate OOM (#17595)
Browse files Browse the repository at this point in the history
Signed-off-by: MrCroxx <[email protected]>
  • Loading branch information
MrCroxx authored Jul 9, 2024
1 parent 5cebcad commit 90f45c0
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 15 deletions.
20 changes: 10 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ license = "Apache-2.0"
repository = "https://github.com/risingwavelabs/risingwave"

[workspace.dependencies]
foyer = { version = "0.10.0", features = ["nightly", "mtrace"] }
foyer = { version = "0.10.1", features = ["nightly", "mtrace"] }
apache-avro = { git = "https://github.com/risingwavelabs/avro", rev = "25113ba88234a9ae23296e981d8302c290fdaa4b", features = [
"snappy",
"zstandard",
Expand Down
30 changes: 30 additions & 0 deletions src/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,9 @@ pub struct FileCacheConfig {
#[serde(default = "default::file_cache::compression")]
pub compression: String,

#[serde(default = "default::file_cache::flush_buffer_threshold_mb")]
pub flush_buffer_threshold_mb: Option<usize>,

#[serde(default, flatten)]
#[config_doc(omitted)]
pub unrecognized: Unrecognized<Self>,
Expand Down Expand Up @@ -1591,6 +1594,14 @@ pub mod default {
pub fn table_info_statistic_history_times() -> usize {
240
}

pub fn block_file_cache_flush_buffer_threshold_mb() -> usize {
256
}

pub fn meta_file_cache_flush_buffer_threshold_mb() -> usize {
64
}
}

pub mod streaming {
Expand Down Expand Up @@ -1651,6 +1662,10 @@ pub mod default {
pub fn compression() -> String {
"none".to_string()
}

pub fn flush_buffer_threshold_mb() -> Option<usize> {
None
}
}

pub mod cache_refill {
Expand Down Expand Up @@ -2123,6 +2138,8 @@ pub struct StorageMemoryConfig {
pub prefetch_buffer_capacity_mb: usize,
pub block_cache_eviction_config: EvictionConfig,
pub meta_cache_eviction_config: EvictionConfig,
pub block_file_cache_flush_buffer_threshold_mb: usize,
pub meta_file_cache_flush_buffer_threshold_mb: usize,
}

pub const MAX_META_CACHE_SHARD_BITS: usize = 4;
Expand Down Expand Up @@ -2234,6 +2251,17 @@ pub fn extract_storage_memory_config(s: &RwConfig) -> StorageMemoryConfig {
}
});

let block_file_cache_flush_buffer_threshold_mb = s
.storage
.data_file_cache
.flush_buffer_threshold_mb
.unwrap_or(default::storage::block_file_cache_flush_buffer_threshold_mb());
let meta_file_cache_flush_buffer_threshold_mb = s
.storage
.meta_file_cache
.flush_buffer_threshold_mb
.unwrap_or(default::storage::block_file_cache_flush_buffer_threshold_mb());

StorageMemoryConfig {
block_cache_capacity_mb,
block_cache_shard_num,
Expand All @@ -2244,6 +2272,8 @@ pub fn extract_storage_memory_config(s: &RwConfig) -> StorageMemoryConfig {
prefetch_buffer_capacity_mb,
block_cache_eviction_config,
meta_cache_eviction_config,
block_file_cache_flush_buffer_threshold_mb,
meta_file_cache_flush_buffer_threshold_mb,
}
}

Expand Down
20 changes: 20 additions & 0 deletions src/compute/src/memory/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,24 @@ pub fn storage_memory_config(
((non_reserved_memory_bytes as f64 * compactor_memory_proportion).ceil() as usize) >> 20,
);

// The file cache flush buffer threshold is used as a emergency limitation.
// On most cases the flush buffer is not supposed to be as large as the threshold.
// So, the file cache flush buffer threshold size is not calculated in the memory usage.
let block_file_cache_flush_buffer_threshold_mb = storage_config
.data_file_cache
.flush_buffer_threshold_mb
.unwrap_or(
risingwave_common::config::default::storage::block_file_cache_flush_buffer_threshold_mb(
),
);
let meta_file_cache_flush_buffer_threshold_mb = storage_config
.meta_file_cache
.flush_buffer_threshold_mb
.unwrap_or(
risingwave_common::config::default::storage::meta_file_cache_flush_buffer_threshold_mb(
),
);

let total_calculated_mb = block_cache_capacity_mb
+ meta_cache_capacity_mb
+ shared_buffer_capacity_mb
Expand Down Expand Up @@ -276,6 +294,8 @@ pub fn storage_memory_config(
prefetch_buffer_capacity_mb,
block_cache_eviction_config,
meta_cache_eviction_config,
block_file_cache_flush_buffer_threshold_mb,
meta_file_cache_flush_buffer_threshold_mb,
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/storage/src/opts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ pub struct StorageOpts {
pub data_file_cache_insert_rate_limit_mb: usize,
pub data_file_cache_indexer_shards: usize,
pub data_file_cache_compression: String,
pub data_file_cache_flush_buffer_threshold_mb: usize,

pub cache_refill_data_refill_levels: Vec<u32>,
pub cache_refill_timeout_ms: u64,
Expand All @@ -108,6 +109,7 @@ pub struct StorageOpts {
pub meta_file_cache_insert_rate_limit_mb: usize,
pub meta_file_cache_indexer_shards: usize,
pub meta_file_cache_compression: String,
pub meta_file_cache_flush_buffer_threshold_mb: usize,

/// The storage url for storing backups.
pub backup_storage_url: String,
Expand Down Expand Up @@ -183,6 +185,7 @@ impl From<(&RwConfig, &SystemParamsReader, &StorageMemoryConfig)> for StorageOpt
data_file_cache_insert_rate_limit_mb: c.storage.data_file_cache.insert_rate_limit_mb,
data_file_cache_indexer_shards: c.storage.data_file_cache.indexer_shards,
data_file_cache_compression: c.storage.data_file_cache.compression.clone(),
data_file_cache_flush_buffer_threshold_mb: s.block_file_cache_flush_buffer_threshold_mb,
meta_file_cache_dir: c.storage.meta_file_cache.dir.clone(),
meta_file_cache_capacity_mb: c.storage.meta_file_cache.capacity_mb,
meta_file_cache_file_capacity_mb: c.storage.meta_file_cache.file_capacity_mb,
Expand All @@ -192,6 +195,7 @@ impl From<(&RwConfig, &SystemParamsReader, &StorageMemoryConfig)> for StorageOpt
meta_file_cache_insert_rate_limit_mb: c.storage.meta_file_cache.insert_rate_limit_mb,
meta_file_cache_indexer_shards: c.storage.meta_file_cache.indexer_shards,
meta_file_cache_compression: c.storage.meta_file_cache.compression.clone(),
meta_file_cache_flush_buffer_threshold_mb: s.meta_file_cache_flush_buffer_threshold_mb,
cache_refill_data_refill_levels: c.storage.cache_refill.data_refill_levels.clone(),
cache_refill_timeout_ms: c.storage.cache_refill.timeout_ms,
cache_refill_concurrency: c.storage.cache_refill.concurrency,
Expand Down
10 changes: 6 additions & 4 deletions src/storage/src/store_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ pub mod verify {

// TODO: may avoid manual async fn when the bug of rust compiler is fixed. Currently it will
// fail to compile.
#[allow(clippy::manual_async_fn)]
#[expect(clippy::manual_async_fn)]
fn iter(
&self,
key_range: TableKeyRange,
Expand All @@ -303,7 +303,7 @@ pub mod verify {
}
}

#[allow(clippy::manual_async_fn)]
#[expect(clippy::manual_async_fn)]
fn rev_iter(
&self,
key_range: TableKeyRange,
Expand Down Expand Up @@ -421,7 +421,7 @@ pub mod verify {
actual
}

#[allow(clippy::manual_async_fn)]
#[expect(clippy::manual_async_fn)]
fn iter(
&self,
key_range: TableKeyRange,
Expand All @@ -442,7 +442,7 @@ pub mod verify {
}
}

#[allow(clippy::manual_async_fn)]
#[expect(clippy::manual_async_fn)]
fn rev_iter(
&self,
key_range: TableKeyRange,
Expand Down Expand Up @@ -644,6 +644,7 @@ impl StateStoreImpl {
.with_indexer_shards(opts.meta_file_cache_indexer_shards)
.with_flushers(opts.meta_file_cache_flushers)
.with_reclaimers(opts.meta_file_cache_reclaimers)
.with_buffer_threshold(opts.meta_file_cache_flush_buffer_threshold_mb * MB) // 128 MiB
.with_clean_region_threshold(
opts.meta_file_cache_reclaimers + opts.meta_file_cache_reclaimers / 2,
)
Expand Down Expand Up @@ -696,6 +697,7 @@ impl StateStoreImpl {
.with_indexer_shards(opts.data_file_cache_indexer_shards)
.with_flushers(opts.data_file_cache_flushers)
.with_reclaimers(opts.data_file_cache_reclaimers)
.with_buffer_threshold(opts.data_file_cache_flush_buffer_threshold_mb * MB) // 128 MiB
.with_clean_region_threshold(
opts.data_file_cache_reclaimers + opts.data_file_cache_reclaimers / 2,
)
Expand Down

0 comments on commit 90f45c0

Please sign in to comment.