Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: auto heap dump by default if MALLOC_CONF=prof:true #12186

Merged
merged 4 commits into from
Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/batch/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ rand = "0.8"
tempfile = "3"

[target.'cfg(unix)'.dev-dependencies]
tikv-jemallocator = { git = "https://github.com/yuhao-su/jemallocator.git", rev = "a0911601bb7bb263ca55c7ea161ef308fdc623f8" }
tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", rev = "b7f9f3" }

[[bench]]
name = "filter"
Expand Down
6 changes: 5 additions & 1 deletion src/cmd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ workspace-hack = { path = "../workspace-hack" }
task_stats_alloc = { path = "../utils/task_stats_alloc" }

[target.'cfg(unix)'.dependencies]
tikv-jemallocator = { git = "https://github.com/yuhao-su/jemallocator.git", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"], rev = "a0911601bb7bb263ca55c7ea161ef308fdc623f8" }
tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", features = [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about making it a workspace dependency?

[workspace.dependencies]

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let us do it in future PRs. cc. @yuhao-su

"profiling",
"stats",
"unprefixed_malloc_on_supported_platforms",
], rev = "b7f9f3" }

[[bin]]
name = "frontend"
Expand Down
6 changes: 5 additions & 1 deletion src/cmd_all/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ vergen = { version = "8", default-features = false, features = ["build", "git",
task_stats_alloc = { path = "../utils/task_stats_alloc" }

[target.'cfg(unix)'.dependencies]
tikv-jemallocator = { git = "https://github.com/yuhao-su/jemallocator.git", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"], rev = "a0911601bb7bb263ca55c7ea161ef308fdc623f8" }
tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", features = [
"profiling",
"stats",
"unprefixed_malloc_on_supported_platforms",
], rev = "b7f9f3" }

[[bin]]
name = "risingwave"
Expand Down
25 changes: 13 additions & 12 deletions src/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ pub struct ServerConfig {
pub unrecognized: Unrecognized<Self>,

/// Enable heap profile dump when memory usage is high.
#[serde(default = "default::server::auto_dump_heap_profile")]
#[serde(default)]
pub auto_dump_heap_profile: AutoDumpHeapProfileConfig,
}

Expand Down Expand Up @@ -658,18 +658,19 @@ impl AsyncStackTraceOption {

#[derive(Clone, Debug, Serialize, Deserialize, DefaultFromSerde)]
pub struct AutoDumpHeapProfileConfig {
/// Enable to auto dump heap profile when memory usage is high
#[serde(default = "default::auto_dump_heap_profile::enabled")]
pub enabled: bool,

/// The directory to dump heap profile. If empty, the prefix in `MALLOC_CONF` will be used
#[serde(default = "default::auto_dump_heap_profile::dir")]
pub dir: String,

/// The proportion (number between 0 and 1) of memory usage to trigger heap profile dump
#[serde(default = "default::auto_dump_heap_profile::threshold")]
pub threshold: f32,
}

impl AutoDumpHeapProfileConfig {
pub fn enabled(&self) -> bool {
!self.dir.is_empty()
}
}

serde_with::with_prefix!(streaming_prefix "stream_");
serde_with::with_prefix!(batch_prefix "batch_");

Expand Down Expand Up @@ -908,7 +909,7 @@ pub mod default {
}

pub mod server {
use crate::config::{AutoDumpHeapProfileConfig, MetricLevel};
use crate::config::MetricLevel;

pub fn heartbeat_interval_ms() -> u32 {
1000
Expand All @@ -925,10 +926,6 @@ pub mod default {
pub fn telemetry_enabled() -> bool {
true
}

pub fn auto_dump_heap_profile() -> AutoDumpHeapProfileConfig {
Default::default()
}
}

pub mod storage {
Expand Down Expand Up @@ -1130,6 +1127,10 @@ pub mod default {
}

pub mod auto_dump_heap_profile {
pub fn enabled() -> bool {
true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where will we read the env var MALLOC_CONF? 👀

Copy link
Member Author

@fuyufjh fuyufjh Sep 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here: https://github.com/risingwavelabs/risingwave/pull/12186/files#diff-22db760b18e3eab5a13a58cf0ef5249aa2defa83c011f73bf72e41e58bb8363bR131-R132

let prof_prefix_mib = jemalloc_prof::prefix::mib().unwrap();
let prof_prefix = prof_prefix_mib.read().unwrap();

It's actually read from jemalloc lib as its option.

Note that the auto-dump is enabled by default, but if jemalloc's profiling is disabled (which is also the default), nothing happens except the log line mentioned in the PR description.

}

pub fn dir() -> String {
"".to_string()
}
Expand Down
2 changes: 1 addition & 1 deletion src/compute/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ tower = { version = "0.4", features = ["util", "load-shed"] }
tracing = "0.1"

[target.'cfg(target_os = "linux")'.dependencies]
tikv-jemalloc-ctl = { git = "https://github.com/yuhao-su/jemallocator.git", rev = "a0911601bb7bb263ca55c7ea161ef308fdc623f8" }
tikv-jemalloc-ctl = { git = "https://github.com/risingwavelabs/jemallocator.git", rev = "b7f9f3" }
[target.'cfg(not(madsim))'.dependencies]
workspace-hack = { path = "../workspace-hack" }

Expand Down
2 changes: 1 addition & 1 deletion src/compute/src/memory_management/memory_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ impl GlobalMemoryManager {
.unwrap();
tracing::info!("memory control policy: {:?}", &memory_control_policy);

if auto_dump_heap_profile_config.enabled() {
if auto_dump_heap_profile_config.enabled {
fs::create_dir_all(&auto_dump_heap_profile_config.dir).unwrap();
}
Arc::new(Self {
Expand Down
15 changes: 8 additions & 7 deletions src/compute/src/memory_management/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,8 @@ pub fn build_memory_control_policy(
total_memory_bytes: usize,
auto_dump_heap_profile_config: AutoDumpHeapProfileConfig,
) -> Result<MemoryControlRef> {
use risingwave_common::bail;
use tikv_jemalloc_ctl::opt;

use self::policy::JemallocMemoryControl;

if !opt::prof::read().unwrap() && auto_dump_heap_profile_config.enabled() {
bail!("Auto heap profile dump should not be enabled with Jemalloc profile disable");
}

Ok(Box::new(JemallocMemoryControl::new(
total_memory_bytes,
auto_dump_heap_profile_config,
Expand Down Expand Up @@ -122,6 +115,14 @@ impl MemoryControl for DummyPolicy {
/// overhead, network buffer, etc. based on `SYSTEM_RESERVED_MEMORY_PROPORTION`. The reserve memory
/// size must be larger than `MIN_SYSTEM_RESERVED_MEMORY_MB`
pub fn reserve_memory_bytes(total_memory_bytes: usize) -> (usize, usize) {
if total_memory_bytes < MIN_COMPUTE_MEMORY_MB << 20 {
panic!(
"The total memory size ({}) is too small. It must be at least {} MB.",
convert(total_memory_bytes as _),
MIN_COMPUTE_MEMORY_MB
);
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't know why MIN_COMPUTE_MEMORY_MB was not checked anywhere, so I added it by the way.


let reserved = std::cmp::max(
(total_memory_bytes as f64 * SYSTEM_RESERVED_MEMORY_PROPORTION).ceil() as usize,
MIN_SYSTEM_RESERVED_MEMORY_MB << 20,
Expand Down
35 changes: 28 additions & 7 deletions src/compute/src/memory_management/policy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ use risingwave_batch::task::BatchManager;
use risingwave_common::config::AutoDumpHeapProfileConfig;
use risingwave_common::util::epoch::Epoch;
use risingwave_stream::task::LocalStreamManager;
use tikv_jemalloc_ctl::{epoch as jemalloc_epoch, prof as jemalloc_prof, stats as jemalloc_stats};
use tikv_jemalloc_ctl::{
epoch as jemalloc_epoch, opt as jemalloc_opt, prof as jemalloc_prof, stats as jemalloc_stats,
};

use super::{MemoryControl, MemoryControlStats};

Expand Down Expand Up @@ -100,22 +102,39 @@ impl JemallocMemoryControl {
}

fn dump_heap_prof(&self, cur_used_memory_bytes: usize, prev_used_memory_bytes: usize) {
if !self.auto_dump_heap_profile_config.enabled() {
if !self.auto_dump_heap_profile_config.enabled {
return;
}

if cur_used_memory_bytes > self.threshold_auto_dump_heap_profile
&& prev_used_memory_bytes <= self.threshold_auto_dump_heap_profile
{
let opt_prof = jemalloc_opt::prof::read().unwrap();
if !opt_prof {
tracing::info!("Cannot dump heap profile because Jemalloc prof is not enabled");
return;
}
yuhao-su marked this conversation as resolved.
Show resolved Hide resolved

let time_prefix = chrono::Local::now().format("%Y-%m-%d-%H-%M-%S").to_string();
let file_name = format!(
"{}.exceed-threshold-aggressive-heap-prof.compute.dump.{}\0",
time_prefix, self.dump_seq,
);
let file_path = Path::new(&self.auto_dump_heap_profile_config.dir)
.join(Path::new(&file_name))
.to_str()
.unwrap()
.to_string();

let file_path = if !self.auto_dump_heap_profile_config.dir.is_empty() {
Path::new(&self.auto_dump_heap_profile_config.dir)
.join(Path::new(&file_name))
.to_str()
.unwrap()
.to_string()
} else {
let prof_prefix_mib = jemalloc_prof::prefix::mib().unwrap();
let prof_prefix = prof_prefix_mib.read().unwrap();
let mut file_path = prof_prefix.to_string_lossy().to_string();
file_path.push_str(&file_name);
file_path
};

let file_path_str = Box::leak(file_path.into_boxed_str());
let file_path_bytes = unsafe { file_path_str.as_bytes_mut() };
let file_path_ptr = file_path_bytes.as_mut_ptr();
Expand All @@ -124,6 +143,8 @@ impl JemallocMemoryControl {
.write(CStr::from_bytes_with_nul(file_path_bytes).unwrap())
{
tracing::warn!("Auto Jemalloc dump heap file failed! {:?}", e);
} else {
tracing::info!("Successfully dumped heap profile to {}", file_name);
}
unsafe { Box::from_raw(file_path_ptr) };
}
Expand Down
1 change: 1 addition & 0 deletions src/config/example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ metrics_level = "Info"
telemetry_enabled = true

[server.auto_dump_heap_profile]
enabled = true
dir = ""
threshold = 0.8999999761581421

Expand Down
4 changes: 3 additions & 1 deletion src/tests/simulation/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ serde_derive = "1.0.183"
serde_json = "1.0.105"
sqllogictest = "0.15.2"
tempfile = "3"
tikv-jemallocator = { git = "https://github.com/yuhao-su/jemallocator.git", features = ["profiling"], rev = "a0911601bb7bb263ca55c7ea161ef308fdc623f8" }
tikv-jemallocator = { git = "https://github.com/risingwavelabs/jemallocator.git", features = [
"profiling",
], rev = "b7f9f3" }
tokio = { version = "0.2.23", package = "madsim-tokio" }
tokio-postgres = "0.7"
tracing = "0.1"
Expand Down