Skip to content

Commit

Permalink
feat(compute): give more batch memory for serving node (#18365) (#18367)
Browse files Browse the repository at this point in the history
Co-authored-by: Dylan <[email protected]>
  • Loading branch information
github-actions[bot] and chenzl25 authored Sep 3, 2024
1 parent dc8faba commit 92e45bb
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
11 changes: 8 additions & 3 deletions src/compute/src/memory/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ const STORAGE_META_CACHE_MEMORY_PROPORTION: f64 = 0.35;
const STORAGE_SHARED_BUFFER_MEMORY_PROPORTION: f64 = 0.3;

/// The proportion of compute memory used for batch processing.
const COMPUTE_BATCH_MEMORY_PROPORTION: f64 = 0.3;
const COMPUTE_BATCH_MEMORY_PROPORTION_FOR_STREAMING: f64 = 0.3;
const COMPUTE_BATCH_MEMORY_PROPORTION_FOR_SERVING: f64 = 0.6;

/// Each compute node reserves some memory for stack and code segment of processes, allocation
/// overhead, network buffer, etc. based on gradient reserve memory proportion. The reserve memory
Expand Down Expand Up @@ -299,8 +300,12 @@ pub fn storage_memory_config(
}
}

pub fn batch_mem_limit(compute_memory_bytes: usize) -> u64 {
(compute_memory_bytes as f64 * COMPUTE_BATCH_MEMORY_PROPORTION) as u64
pub fn batch_mem_limit(compute_memory_bytes: usize, is_serving_node: bool) -> u64 {
if is_serving_node {
(compute_memory_bytes as f64 * COMPUTE_BATCH_MEMORY_PROPORTION_FOR_SERVING) as u64
} else {
(compute_memory_bytes as f64 * COMPUTE_BATCH_MEMORY_PROPORTION_FOR_STREAMING) as u64
}
}

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion src/compute/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ pub async fn compute_node_serve(
let batch_mgr = Arc::new(BatchManager::new(
config.batch.clone(),
batch_manager_metrics,
batch_mem_limit(compute_memory_bytes),
batch_mem_limit(compute_memory_bytes, opts.role.for_serving()),
));

// NOTE: Due to some limits, we use `compute_memory_bytes + storage_memory_bytes` as
Expand Down

0 comments on commit 92e45bb

Please sign in to comment.