Skip to content

Commit

Permalink
refactor: refactor file cache metrics build, bump foyer (#12121)
Browse files Browse the repository at this point in the history
Signed-off-by: MrCroxx <[email protected]>
  • Loading branch information
MrCroxx authored Sep 6, 2023
1 parent da89875 commit 52192e6
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 80 deletions.
39 changes: 28 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docker/dashboards/risingwave-dev-dashboard.json

Large diffs are not rendered by default.

61 changes: 12 additions & 49 deletions grafana/risingwave-dev-dashboard.dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -1691,16 +1691,6 @@ def section_hummock(panels):
),
],
),
panels.timeseries_ops(
"File Cache Ops",
"",
[
panels.target(
f"sum(rate({metric('foyer_storage_latency_count')}[$__rate_interval])) by (op, extra, instance)",
"file cache {{op}} {{extra}} @ {{instance}}",
),
],
),
panels.timeseries_ops(
"Read Ops",
"",
Expand Down Expand Up @@ -1918,10 +1908,6 @@ def section_hummock(panels):
f"(sum(rate({table_metric('state_store_sst_store_block_request_counts', data_miss_filter)}[$__rate_interval])) by (job,instance,table_id)) / (sum(rate({table_metric('state_store_sst_store_block_request_counts', data_total_filter)}[$__rate_interval])) by (job,instance,table_id))",
"block cache miss rate - {{table_id}} @ {{job}} @ {{instance}}",
),
panels.target(
f"(sum(rate({metric('file_cache_miss')}[$__rate_interval])) by (instance)) / (sum(rate({metric('file_cache_latency_count', file_cache_get_filter)}[$__rate_interval])) by (instance))",
"file cache miss rate @ {{instance}}",
),
],
),
panels.timeseries_percentage(
Expand Down Expand Up @@ -2218,32 +2204,20 @@ def section_hummock_tiered_cache(outer_panels):
"",
[
panels.target(
f"sum(rate({metric('data_foyer_storage_latency_count')}[$__rate_interval])) by (op, extra, instance)",
"data file cache {{op}} {{extra}} @ {{instance}}",
),
panels.target(
f"sum(rate({metric('meta_foyer_storage_latency_count')}[$__rate_interval])) by (op, extra, instance)",
"meta cache {{op}} {{extra}} @ {{instance}}",
f"sum(rate({metric('foyer_storage_op_duration_count')}[$__rate_interval])) by (foyer, op, extra, instance)",
"{{foyer}} file cache {{op}} {{extra}} @ {{instance}}",
),
],
),
panels.timeseries_latency(
"Latency",
"Duration",
"",
[
*quantile(
lambda quantile, legend: panels.target(
f"histogram_quantile({quantile}, sum(rate({metric('data_foyer_storage_latency_bucket')}[$__rate_interval])) by (le, op, extra, instance))",
f"p{legend} - data file cache" +
" - {{op}} {{extra}} @ {{instance}}",
),
[50, 90, 99, "max"],
),
*quantile(
lambda quantile, legend: panels.target(
f"histogram_quantile({quantile}, sum(rate({metric('meta_foyer_storage_latency_bucket')}[$__rate_interval])) by (le, op, extra, instance))",
f"p{legend} - meta file cache" +
" - {{op}} {{extra}} @ {{instance}}",
f"histogram_quantile({quantile}, sum(rate({metric('foyer_storage_op_duration_bucket')}[$__rate_interval])) by (le, foyer, op, extra, instance))",
f"p{legend}" +
" - {{foyer}} file cache - {{op}} {{extra}} @ {{instance}}",
),
[50, 90, 99, "max"],
),
Expand All @@ -2254,12 +2228,8 @@ def section_hummock_tiered_cache(outer_panels):
"",
[
panels.target(
f"sum(rate({metric('data_foyer_storage_bytes')}[$__rate_interval])) by (op, extra, instance)",
"data file cache - {{op}} {{extra}} @ {{instance}}",
),
panels.target(
f"sum(rate({metric('meta_foyer_storage_bytes')}[$__rate_interval])) by (op, extra, instance)",
"meta file cache - {{op}} {{extra}} @ {{instance}}",
f"sum(rate({metric('foyer_storage_op_bytes')}[$__rate_interval])) by (foyer, op, extra, instance)",
"{{foyer}} file cache - {{op}} {{extra}} @ {{instance}}",
),
],
),
Expand All @@ -2268,10 +2238,7 @@ def section_hummock_tiered_cache(outer_panels):
"",
[
panels.target(
f"{metric('data_foyer_storage_size')}", "size @ {{instance}}"
),
panels.target(
f"{metric('meta_foyer_storage_size')}", "size @ {{instance}}"
f"sum({metric('foyer_storage_total_bytes')}) by (foyer, instance)", "{{foyer}} size @ {{instance}}"
),
],
),
Expand All @@ -2280,12 +2247,8 @@ def section_hummock_tiered_cache(outer_panels):
"",
[
panels.target(
f"sum(rate({metric('data_foyer_storage_latency_count', file_cache_hit_filter)}[$__rate_interval])) by (instance) / (sum(rate({metric('data_foyer_storage_latency_count', file_cache_hit_filter)}[$__rate_interval])) by (instance) + sum(rate({metric('data_foyer_storage_latency_count', file_cache_miss_filter)}[$__rate_interval])) by (instance))",
"data file cache hit ratio @ {{instance}}",
),
panels.target(
f"sum(rate({metric('meta_foyer_storage_latency_count', file_cache_hit_filter)}[$__rate_interval])) by (instance) / (sum(rate({metric('meta_foyer_storage_latency_count', file_cache_hit_filter)}[$__rate_interval])) by (instance) + sum(rate({metric('meta_foyer_storage_latency_count', file_cache_miss_filter)}[$__rate_interval])) by (instance))",
"meta file cache hit ratio @ {{instance}}",
f"sum(rate({metric('foyer_storage_op_duration_count', file_cache_hit_filter)}[$__rate_interval])) by (foyer, instance) / (sum(rate({metric('foyer_storage_op_duration_count', file_cache_hit_filter)}[$__rate_interval])) by (foyer, instance) + sum(rate({metric('foyer_storage_op_duration_count', file_cache_miss_filter)}[$__rate_interval])) by (foyer, instance))",
"{{foyer}} file cache hit ratio @ {{instance}}",
),
],
),
Expand All @@ -2294,7 +2257,7 @@ def section_hummock_tiered_cache(outer_panels):
"",
[
panels.target(
f"sum(refill_queue_length) by (instance)",
f"sum(refill_queue_total) by (instance)",
"refill queue length @ {{instance}}",
),
],
Expand Down
2 changes: 1 addition & 1 deletion grafana/risingwave-dev-dashboard.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/storage/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dyn-clone = "1.0.13"
either = "1"
enum-as-inner = "0.6"
fail = "0.5"
foyer = { git = "https://github.com/mrcroxx/foyer", rev = "99b21df" }
foyer = { git = "https://github.com/mrcroxx/foyer", rev = "2b8907c" }
futures = { version = "0.3", default-features = false, features = ["alloc"] }
futures-async-stream = { workspace = true }
hex = "0.4"
Expand Down
2 changes: 1 addition & 1 deletion src/storage/src/hummock/event_handler/refiller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ impl CacheRefillTask {
let holders = match Self::meta_cache_refill(&context, delta).await {
Ok(holders) => holders,
Err(e) => {
tracing::warn!("meeta cache refill error: {:?}", e);
tracing::warn!("meta cache refill error: {:?}", e);
return;
}
};
Expand Down
12 changes: 4 additions & 8 deletions src/storage/src/hummock/file_cache/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ use foyer::common::code::{Key, Value};
use foyer::storage::admission::rated_random::RatedRandomAdmissionPolicy;
use foyer::storage::admission::AdmissionPolicy;
use foyer::storage::event::EventListener;
use foyer::storage::store::{FetchValueFuture, PrometheusConfig};
pub use foyer::storage::metrics::set_metrics_registry as set_foyer_metrics_registry;
use foyer::storage::store::FetchValueFuture;
use foyer::storage::LfuFsStoreConfig;
use prometheus::Registry;
use risingwave_common::util::runtime::BackgroundShutdownRuntime;
use risingwave_hummock_sdk::HummockSstableObjectId;

Expand Down Expand Up @@ -59,6 +59,7 @@ where
K: Key,
V: Value,
{
pub name: String,
pub dir: PathBuf,
pub capacity: usize,
pub file_capacity: usize,
Expand All @@ -73,8 +74,6 @@ where
pub lfu_window_to_cache_size_ratio: usize,
pub lfu_tiny_lru_capacity_ratio: f64,
pub rated_random_rate: usize,
pub prometheus_registry: Option<Registry>,
pub prometheus_namespace: Option<String>,
pub event_listener: Vec<Arc<dyn EventListener<K = K, V = V>>>,
pub enable_filter: bool,
}
Expand Down Expand Up @@ -201,6 +200,7 @@ where
}

let c = LfuFsStoreConfig {
name: foyer_store_config.name,
eviction_config: EvictionConfig {
window_to_cache_size_ratio: foyer_store_config
.lfu_window_to_cache_size_ratio,
Expand All @@ -222,10 +222,6 @@ where
reclaim_rate_limit: foyer_store_config.reclaim_rate_limit,
recover_concurrency: foyer_store_config.recover_concurrency,
event_listeners: foyer_store_config.event_listener,
prometheus_config: PrometheusConfig {
registry: foyer_store_config.prometheus_registry,
namespace: foyer_store_config.prometheus_namespace,
},
clean_region_threshold: foyer_store_config.reclaimers
+ foyer_store_config.reclaimers / 2,
};
Expand Down
12 changes: 6 additions & 6 deletions src/storage/src/store_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ use crate::hummock::backup_reader::BackupReaderRef;
use crate::hummock::hummock_meta_client::MonitoredHummockMetaClient;
use crate::hummock::sstable_store::SstableStoreRef;
use crate::hummock::{
FileCache, FoyerRuntimeConfig, FoyerStoreConfig, HummockError, HummockStorage, MemoryLimiter,
SstableObjectIdManagerRef, SstableStore,
set_foyer_metrics_registry, FileCache, FoyerRuntimeConfig, FoyerStoreConfig, HummockError,
HummockStorage, MemoryLimiter, SstableObjectIdManagerRef, SstableStore,
};
use crate::memory::sled::SledStateStore;
use crate::memory::MemoryStateStore;
Expand Down Expand Up @@ -542,12 +542,15 @@ impl StateStoreImpl {
storage_metrics: Arc<MonitoredStorageMetrics>,
compactor_metrics: Arc<CompactorMetrics>,
) -> StorageResult<Self> {
set_foyer_metrics_registry(GLOBAL_METRICS_REGISTRY.clone());

let data_file_cache = if opts.data_file_cache_dir.is_empty() {
FileCache::none()
} else {
const MB: usize = 1024 * 1024;

let foyer_store_config = FoyerStoreConfig {
name: "data".to_string(),
dir: PathBuf::from(opts.data_file_cache_dir.clone()),
capacity: opts.data_file_cache_capacity_mb * MB,
file_capacity: opts.data_file_cache_file_capacity_mb * MB,
Expand All @@ -563,8 +566,6 @@ impl StateStoreImpl {
reclaim_rate_limit: opts.data_file_cache_reclaim_rate_limit_mb * MB,
recover_concurrency: opts.data_file_cache_recover_concurrency,
event_listener: vec![],
prometheus_registry: Some(GLOBAL_METRICS_REGISTRY.clone()),
prometheus_namespace: Some("data".to_string()),
enable_filter: !opts.cache_refill_data_refill_levels.is_empty(),
};
let config = FoyerRuntimeConfig {
Expand All @@ -582,6 +583,7 @@ impl StateStoreImpl {
const MB: usize = 1024 * 1024;

let foyer_store_config = FoyerStoreConfig {
name: "meta".to_string(),
dir: PathBuf::from(opts.meta_file_cache_dir.clone()),
capacity: opts.meta_file_cache_capacity_mb * MB,
file_capacity: opts.meta_file_cache_file_capacity_mb * MB,
Expand All @@ -597,8 +599,6 @@ impl StateStoreImpl {
reclaim_rate_limit: opts.meta_file_cache_reclaim_rate_limit_mb * MB,
recover_concurrency: opts.meta_file_cache_recover_concurrency,
event_listener: vec![],
prometheus_registry: Some(GLOBAL_METRICS_REGISTRY.clone()),
prometheus_namespace: Some("meta".to_string()),
enable_filter: false,
};
let config = FoyerRuntimeConfig {
Expand Down
2 changes: 0 additions & 2 deletions src/workspace-hack/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ memchr = { version = "2" }
miniz_oxide = { version = "0.7", default-features = false, features = ["with-alloc"] }
mio = { version = "0.8", features = ["net", "os-ext"] }
multimap = { version = "0.8" }
nix = { version = "0.26" }
nom = { version = "7" }
num-bigint = { version = "0.4" }
num-integer = { version = "0.1", features = ["i128"] }
Expand Down Expand Up @@ -160,7 +159,6 @@ memchr = { version = "2" }
miniz_oxide = { version = "0.7", default-features = false, features = ["with-alloc"] }
mio = { version = "0.8", features = ["net", "os-ext"] }
multimap = { version = "0.8" }
nix = { version = "0.26" }
nom = { version = "7" }
num-bigint = { version = "0.4" }
num-integer = { version = "0.1", features = ["i128"] }
Expand Down

0 comments on commit 52192e6

Please sign in to comment.