Skip to content

Commit

Permalink
Merge pull request #51 from alamb/alamb/memory_accounting_more
Browse files Browse the repository at this point in the history
Require HeapSize for ParquetValueType
  • Loading branch information
alamb authored Jul 1, 2024
2 parents be3c014 + c1a3fd7 commit 60c03eb
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 13 deletions.
22 changes: 13 additions & 9 deletions parquet/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -586,9 +586,9 @@ pub(crate) mod private {
use crate::encodings::decoding::PlainDecoderDetails;
use crate::util::bit_util::{read_num_bytes, BitReader, BitWriter};

use crate::basic::Type;

use super::{ParquetError, Result, SliceAsBytes};
use crate::basic::Type;
use crate::file::metadata::HeapSize;

/// Sealed trait to start to remove specialisation from implementations
///
Expand All @@ -606,6 +606,7 @@ pub(crate) mod private {
+ SliceAsBytes
+ PartialOrd
+ Send
+ HeapSize
+ crate::encodings::decoding::private::GetDecoder
+ crate::file::statistics::private::MakeStatistics
{
Expand Down Expand Up @@ -654,13 +655,6 @@ pub(crate) mod private {

/// Return the value as an mutable Any to allow for downcasts without transmutation
fn as_mut_any(&mut self) -> &mut dyn std::any::Any;

/// Returns the number of bytes of memory this instance uses on the heap.
///
/// Defaults to none (0)
fn heap_size(&self) -> usize {
0
}
}

impl ParquetValueType for bool {
Expand Down Expand Up @@ -893,6 +887,12 @@ pub(crate) mod private {
}
}

impl HeapSize for super::Int96 {
fn heap_size(&self) -> usize {
0 // no heap allocations
}
}

impl ParquetValueType for super::ByteArray {
const PHYSICAL_TYPE: Type = Type::BYTE_ARRAY;

Expand Down Expand Up @@ -975,7 +975,9 @@ pub(crate) mod private {
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
self
}
}

impl HeapSize for super::ByteArray {
fn heap_size(&self) -> usize {
// note: this is an estimate, not exact, so just return the size
// of the actual data used, don't try to handle the fact that it may
Expand Down Expand Up @@ -1068,7 +1070,9 @@ pub(crate) mod private {
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
self
}
}

impl HeapSize for super::FixedLenByteArray {
fn heap_size(&self) -> usize {
self.0.heap_size()
}
Expand Down
27 changes: 23 additions & 4 deletions parquet/src/file/metadata/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::format::{BoundaryOrder, PageLocation, SortingColumn};
use std::sync::Arc;

/// Trait for calculating the size of various containers
pub(crate) trait HeapSize {
pub trait HeapSize {
/// Return the size of any bytes allocated on the heap by this object,
/// including heap memory in those structures
///
Expand Down Expand Up @@ -176,11 +176,30 @@ impl<T: ParquetValueType> HeapSize for ValueStatistics<T> {
self.min().heap_size() + self.max().heap_size()
}
}
impl HeapSize for bool {
fn heap_size(&self) -> usize {
0 // no heap allocations
}
}
impl HeapSize for i32 {
fn heap_size(&self) -> usize {
0 // no heap allocations
}
}
impl HeapSize for i64 {
fn heap_size(&self) -> usize {
0 // no heap allocations
}
}

// Note this impl gets most primitive types like bool, i32, etc
impl<T: ParquetValueType> HeapSize for T {
impl HeapSize for f32 {
fn heap_size(&self) -> usize {
0 // no heap allocations
}
}
impl HeapSize for f64 {
fn heap_size(&self) -> usize {
self.heap_size()
0 // no heap allocations
}
}

Expand Down

0 comments on commit 60c03eb

Please sign in to comment.