Skip to content

Commit

Permalink
Lazy-allocate pusher memory.
Browse files Browse the repository at this point in the history
The current implementation of channel pushers preallocates
`Message::default_length` entries per pusher.  For very large graphs this
adds up to a lot of memory being allocated even when the system is idle
with no outstanding messages.  This patch changes the allocation policy
to only allocate channel memory when there are messages to send and to
deallocate it when there are no messages, reducing the memory footprint
to 0 in idle state at the cost of some potential slow-down due to a
larger number of allocations.

See TimelyDataflow#394.
  • Loading branch information
ryzhyk committed Jun 25, 2021
1 parent 209af99 commit b8aeed0
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 17 deletions.
12 changes: 0 additions & 12 deletions timely/src/dataflow/channels/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,4 @@ impl<T, D> Message<T, D> {
let mut bundle = Some(Bundle::from_typed(message));

pusher.push(&mut bundle);

if let Some(message) = bundle {
if let Some(message) = message.if_typed() {
*buffer = message.data;
buffer.clear();
}
}

// TODO: Unclear we always want this here.
if buffer.capacity() != Self::default_length() {
*buffer = Vec::with_capacity(Self::default_length());
}
}}
5 changes: 4 additions & 1 deletion timely/src/dataflow/channels/pushers/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl<T, D, P: Push<Bundle<T, D>>> Buffer<T, D, P> where T: Eq+Clone {
pub fn new(pusher: P) -> Buffer<T, D, P> {
Buffer {
time: None,
buffer: Vec::with_capacity(Message::<T, D>::default_length()),
buffer: Vec::new(),
pusher,
}
}
Expand Down Expand Up @@ -65,6 +65,9 @@ impl<T, D, P: Push<Bundle<T, D>>> Buffer<T, D, P> where T: Eq+Clone {

// internal method for use by `Session`.
fn give(&mut self, data: D) {
if self.buffer.capacity() == 0 {
self.buffer = Vec::with_capacity(Message::<T, D>::default_length());
}
self.buffer.push(data);
// assert!(self.buffer.capacity() == Message::<O::Data>::default_length());
if self.buffer.len() == self.buffer.capacity() {
Expand Down
8 changes: 7 additions & 1 deletion timely/src/dataflow/channels/pushers/exchange.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ impl<T: Clone, D, P: Push<Bundle<T, D>>, H: FnMut(&T, &D)->u64> Exchange<T, D,
pub fn new(pushers: Vec<P>, key: H) -> Exchange<T, D, P, H> {
let mut buffers = vec![];
for _ in 0..pushers.len() {
buffers.push(Vec::with_capacity(Message::<T, D>::default_length()));
buffers.push(Vec::new());
}
Exchange {
pushers,
Expand Down Expand Up @@ -64,6 +64,9 @@ impl<T: Eq+Data, D: Data, P: Push<Bundle<T, D>>, H: FnMut(&T, &D)->u64> Push<Bun
for datum in data.drain(..) {
let index = (((self.hash_func)(time, &datum)) & mask) as usize;

if self.buffers[index].capacity() == 0 {
self.buffers[index] = Vec::with_capacity(Message::<T, D>::default_length());
}
self.buffers[index].push(datum);
if self.buffers[index].len() == self.buffers[index].capacity() {
self.flush(index);
Expand All @@ -82,6 +85,9 @@ impl<T: Eq+Data, D: Data, P: Push<Bundle<T, D>>, H: FnMut(&T, &D)->u64> Push<Bun
else {
for datum in data.drain(..) {
let index = (((self.hash_func)(time, &datum)) % self.pushers.len() as u64) as usize;
if self.buffers[index].capacity() == 0 {
self.buffers[index] = Vec::with_capacity(Message::<T, D>::default_length());
}
self.buffers[index].push(datum);
if self.buffers[index].len() == self.buffers[index].capacity() {
self.flush(index);
Expand Down
2 changes: 1 addition & 1 deletion timely/src/dataflow/channels/pushers/tee.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl<T, D> Tee<T, D> {
pub fn new() -> (Tee<T, D>, TeeHelper<T, D>) {
let shared = Rc::new(RefCell::new(Vec::new()));
let port = Tee {
buffer: Vec::with_capacity(Message::<T, D>::default_length()),
buffer: Vec::new(),
shared: shared.clone(),
};

Expand Down
7 changes: 5 additions & 2 deletions timely/src/dataflow/operators/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,8 +212,8 @@ impl<T:Timestamp, D: Data> Handle<T, D> {
activate: Vec::new(),
progress: Vec::new(),
pushers: Vec::new(),
buffer1: Vec::with_capacity(Message::<T, D>::default_length()),
buffer2: Vec::with_capacity(Message::<T, D>::default_length()),
buffer1: Vec::new(),
buffer2: Vec::new(),
now_at: T::minimum(),
}
}
Expand Down Expand Up @@ -305,6 +305,9 @@ impl<T:Timestamp, D: Data> Handle<T, D> {
/// Sends one record into the corresponding timely dataflow `Stream`, at the current epoch.
pub fn send(&mut self, data: D) {
// assert!(self.buffer1.capacity() == Message::<T, D>::default_length());
if self.buffer1.capacity() == 0 {
self.buffer1 = Vec::with_capacity(Message::<T, D>::default_length());
}
self.buffer1.push(data);
if self.buffer1.len() == self.buffer1.capacity() {
self.flush();
Expand Down

0 comments on commit b8aeed0

Please sign in to comment.