Skip to content

Commit

Permalink
feat: auto heap dump by default if MALLOC_CONF=prof:true (#12186)
Browse files Browse the repository at this point in the history
  • Loading branch information
fuyufjh authored and stdrc committed Sep 11, 2023
1 parent e7b885e commit 8d1980e
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 35 deletions.
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/batch/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ rand = "0.8"
tempfile = "3"

[target.'cfg(unix)'.dev-dependencies]
tikv-jemallocator = { git = "https://github.com/yuhao-su/jemallocator.git", rev = "a0911601bb7bb263ca55c7ea161ef308fdc623f8" }
tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", rev = "b7f9f3" }

[[bench]]
name = "filter"
Expand Down
6 changes: 5 additions & 1 deletion src/cmd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ workspace-hack = { path = "../workspace-hack" }
task_stats_alloc = { path = "../utils/task_stats_alloc" }

[target.'cfg(unix)'.dependencies]
tikv-jemallocator = { git = "https://github.com/yuhao-su/jemallocator.git", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"], rev = "a0911601bb7bb263ca55c7ea161ef308fdc623f8" }
tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", features = [
"profiling",
"stats",
"unprefixed_malloc_on_supported_platforms",
], rev = "b7f9f3" }

[[bin]]
name = "frontend"
Expand Down
6 changes: 5 additions & 1 deletion src/cmd_all/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ vergen = { version = "8", default-features = false, features = ["build", "git",
task_stats_alloc = { path = "../utils/task_stats_alloc" }

[target.'cfg(unix)'.dependencies]
tikv-jemallocator = { git = "https://github.com/yuhao-su/jemallocator.git", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"], rev = "a0911601bb7bb263ca55c7ea161ef308fdc623f8" }
tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", features = [
"profiling",
"stats",
"unprefixed_malloc_on_supported_platforms",
], rev = "b7f9f3" }

[[bin]]
name = "risingwave"
Expand Down
25 changes: 13 additions & 12 deletions src/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ pub struct ServerConfig {
pub unrecognized: Unrecognized<Self>,

/// Enable heap profile dump when memory usage is high.
#[serde(default = "default::server::auto_dump_heap_profile")]
#[serde(default)]
pub auto_dump_heap_profile: AutoDumpHeapProfileConfig,
}

Expand Down Expand Up @@ -658,18 +658,19 @@ impl AsyncStackTraceOption {

#[derive(Clone, Debug, Serialize, Deserialize, DefaultFromSerde)]
pub struct AutoDumpHeapProfileConfig {
/// Enable to auto dump heap profile when memory usage is high
#[serde(default = "default::auto_dump_heap_profile::enabled")]
pub enabled: bool,

/// The directory to dump heap profile. If empty, the prefix in `MALLOC_CONF` will be used
#[serde(default = "default::auto_dump_heap_profile::dir")]
pub dir: String,

/// The proportion (number between 0 and 1) of memory usage to trigger heap profile dump
#[serde(default = "default::auto_dump_heap_profile::threshold")]
pub threshold: f32,
}

impl AutoDumpHeapProfileConfig {
pub fn enabled(&self) -> bool {
!self.dir.is_empty()
}
}

serde_with::with_prefix!(streaming_prefix "stream_");
serde_with::with_prefix!(batch_prefix "batch_");

Expand Down Expand Up @@ -907,7 +908,7 @@ pub mod default {
}

pub mod server {
use crate::config::{AutoDumpHeapProfileConfig, MetricLevel};
use crate::config::MetricLevel;

pub fn heartbeat_interval_ms() -> u32 {
1000
Expand All @@ -924,10 +925,6 @@ pub mod default {
pub fn telemetry_enabled() -> bool {
true
}

pub fn auto_dump_heap_profile() -> AutoDumpHeapProfileConfig {
Default::default()
}
}

pub mod storage {
Expand Down Expand Up @@ -1129,6 +1126,10 @@ pub mod default {
}

pub mod auto_dump_heap_profile {
pub fn enabled() -> bool {
true
}

pub fn dir() -> String {
"".to_string()
}
Expand Down
2 changes: 1 addition & 1 deletion src/compute/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ tower = { version = "0.4", features = ["util", "load-shed"] }
tracing = "0.1"

[target.'cfg(target_os = "linux")'.dependencies]
tikv-jemalloc-ctl = { git = "https://github.com/yuhao-su/jemallocator.git", rev = "a0911601bb7bb263ca55c7ea161ef308fdc623f8" }
tikv-jemalloc-ctl = { git = "https://github.com/risingwavelabs/jemallocator.git", rev = "b7f9f3" }
[target.'cfg(not(madsim))'.dependencies]
workspace-hack = { path = "../workspace-hack" }

Expand Down
2 changes: 1 addition & 1 deletion src/compute/src/memory_management/memory_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ impl GlobalMemoryManager {
.unwrap();
tracing::info!("memory control policy: {:?}", &memory_control_policy);

if auto_dump_heap_profile_config.enabled() {
if auto_dump_heap_profile_config.enabled {
fs::create_dir_all(&auto_dump_heap_profile_config.dir).unwrap();
}
Arc::new(Self {
Expand Down
15 changes: 8 additions & 7 deletions src/compute/src/memory_management/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,8 @@ pub fn build_memory_control_policy(
total_memory_bytes: usize,
auto_dump_heap_profile_config: AutoDumpHeapProfileConfig,
) -> Result<MemoryControlRef> {
use risingwave_common::bail;
use tikv_jemalloc_ctl::opt;

use self::policy::JemallocMemoryControl;

if !opt::prof::read().unwrap() && auto_dump_heap_profile_config.enabled() {
bail!("Auto heap profile dump should not be enabled with Jemalloc profile disable");
}

Ok(Box::new(JemallocMemoryControl::new(
total_memory_bytes,
auto_dump_heap_profile_config,
Expand Down Expand Up @@ -122,6 +115,14 @@ impl MemoryControl for DummyPolicy {
/// overhead, network buffer, etc. based on `SYSTEM_RESERVED_MEMORY_PROPORTION`. The reserve memory
/// size must be larger than `MIN_SYSTEM_RESERVED_MEMORY_MB`
pub fn reserve_memory_bytes(total_memory_bytes: usize) -> (usize, usize) {
if total_memory_bytes < MIN_COMPUTE_MEMORY_MB << 20 {
panic!(
"The total memory size ({}) is too small. It must be at least {} MB.",
convert(total_memory_bytes as _),
MIN_COMPUTE_MEMORY_MB
);
}

let reserved = std::cmp::max(
(total_memory_bytes as f64 * SYSTEM_RESERVED_MEMORY_PROPORTION).ceil() as usize,
MIN_SYSTEM_RESERVED_MEMORY_MB << 20,
Expand Down
35 changes: 28 additions & 7 deletions src/compute/src/memory_management/policy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ use risingwave_batch::task::BatchManager;
use risingwave_common::config::AutoDumpHeapProfileConfig;
use risingwave_common::util::epoch::Epoch;
use risingwave_stream::task::LocalStreamManager;
use tikv_jemalloc_ctl::{epoch as jemalloc_epoch, prof as jemalloc_prof, stats as jemalloc_stats};
use tikv_jemalloc_ctl::{
epoch as jemalloc_epoch, opt as jemalloc_opt, prof as jemalloc_prof, stats as jemalloc_stats,
};

use super::{MemoryControl, MemoryControlStats};

Expand Down Expand Up @@ -100,22 +102,39 @@ impl JemallocMemoryControl {
}

fn dump_heap_prof(&self, cur_used_memory_bytes: usize, prev_used_memory_bytes: usize) {
if !self.auto_dump_heap_profile_config.enabled() {
if !self.auto_dump_heap_profile_config.enabled {
return;
}

if cur_used_memory_bytes > self.threshold_auto_dump_heap_profile
&& prev_used_memory_bytes <= self.threshold_auto_dump_heap_profile
{
let opt_prof = jemalloc_opt::prof::read().unwrap();
if !opt_prof {
tracing::info!("Cannot dump heap profile because Jemalloc prof is not enabled");
return;
}

let time_prefix = chrono::Local::now().format("%Y-%m-%d-%H-%M-%S").to_string();
let file_name = format!(
"{}.exceed-threshold-aggressive-heap-prof.compute.dump.{}\0",
time_prefix, self.dump_seq,
);
let file_path = Path::new(&self.auto_dump_heap_profile_config.dir)
.join(Path::new(&file_name))
.to_str()
.unwrap()
.to_string();

let file_path = if !self.auto_dump_heap_profile_config.dir.is_empty() {
Path::new(&self.auto_dump_heap_profile_config.dir)
.join(Path::new(&file_name))
.to_str()
.unwrap()
.to_string()
} else {
let prof_prefix_mib = jemalloc_prof::prefix::mib().unwrap();
let prof_prefix = prof_prefix_mib.read().unwrap();
let mut file_path = prof_prefix.to_string_lossy().to_string();
file_path.push_str(&file_name);
file_path
};

let file_path_str = Box::leak(file_path.into_boxed_str());
let file_path_bytes = unsafe { file_path_str.as_bytes_mut() };
let file_path_ptr = file_path_bytes.as_mut_ptr();
Expand All @@ -124,6 +143,8 @@ impl JemallocMemoryControl {
.write(CStr::from_bytes_with_nul(file_path_bytes).unwrap())
{
tracing::warn!("Auto Jemalloc dump heap file failed! {:?}", e);
} else {
tracing::info!("Successfully dumped heap profile to {}", file_name);
}
let _ = unsafe { Box::from_raw(file_path_ptr) };
}
Expand Down
1 change: 1 addition & 0 deletions src/config/example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ metrics_level = "Info"
telemetry_enabled = true

[server.auto_dump_heap_profile]
enabled = true
dir = ""
threshold = 0.8999999761581421

Expand Down
4 changes: 3 additions & 1 deletion src/tests/simulation/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ serde_derive = "1.0.183"
serde_json = "1.0.105"
sqllogictest = "0.15.2"
tempfile = "3"
tikv-jemallocator = { git = "https://github.com/yuhao-su/jemallocator.git", features = ["profiling"], rev = "a0911601bb7bb263ca55c7ea161ef308fdc623f8" }
tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", features = [
"profiling",
], rev = "b7f9f3" }
tokio = { version = "0.2.23", package = "madsim-tokio" }
tokio-postgres = "0.7"
tracing = "0.1"
Expand Down

0 comments on commit 8d1980e

Please sign in to comment.