diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs index 8316c3048a92..5bcd2062ca59 100644 --- a/parquet/src/data_type.rs +++ b/parquet/src/data_type.rs @@ -586,9 +586,9 @@ pub(crate) mod private { use crate::encodings::decoding::PlainDecoderDetails; use crate::util::bit_util::{read_num_bytes, BitReader, BitWriter}; - use crate::basic::Type; - use super::{ParquetError, Result, SliceAsBytes}; + use crate::basic::Type; + use crate::file::metadata::HeapSize; /// Sealed trait to start to remove specialisation from implementations /// @@ -606,6 +606,7 @@ pub(crate) mod private { + SliceAsBytes + PartialOrd + Send + + HeapSize + crate::encodings::decoding::private::GetDecoder + crate::file::statistics::private::MakeStatistics { @@ -654,13 +655,6 @@ pub(crate) mod private { /// Return the value as an mutable Any to allow for downcasts without transmutation fn as_mut_any(&mut self) -> &mut dyn std::any::Any; - - /// Returns the number of bytes of memory this instance uses on the heap. - /// - /// Defaults to none (0) - fn heap_size(&self) -> usize { - 0 - } } impl ParquetValueType for bool { @@ -893,6 +887,12 @@ pub(crate) mod private { } } + impl HeapSize for super::Int96 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } + } + impl ParquetValueType for super::ByteArray { const PHYSICAL_TYPE: Type = Type::BYTE_ARRAY; @@ -975,7 +975,9 @@ pub(crate) mod private { fn as_mut_any(&mut self) -> &mut dyn std::any::Any { self } + } + impl HeapSize for super::ByteArray { fn heap_size(&self) -> usize { // note: this is an estimate, not exact, so just return the size // of the actual data used, don't try to handle the fact that it may @@ -1068,7 +1070,9 @@ pub(crate) mod private { fn as_mut_any(&mut self) -> &mut dyn std::any::Any { self } + } + impl HeapSize for super::FixedLenByteArray { fn heap_size(&self) -> usize { self.0.heap_size() } diff --git a/parquet/src/file/metadata/memory.rs b/parquet/src/file/metadata/memory.rs index 8cc8060e4b6b..57b2f7eec0c2 100644 --- a/parquet/src/file/metadata/memory.rs +++ b/parquet/src/file/metadata/memory.rs @@ -28,7 +28,7 @@ use crate::format::{BoundaryOrder, PageLocation, SortingColumn}; use std::sync::Arc; /// Trait for calculating the size of various containers -pub(crate) trait HeapSize { +pub trait HeapSize { /// Return the size of any bytes allocated on the heap by this object, /// including heap memory in those structures /// @@ -176,11 +176,30 @@ impl HeapSize for ValueStatistics { self.min().heap_size() + self.max().heap_size() } } +impl HeapSize for bool { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} +impl HeapSize for i32 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} +impl HeapSize for i64 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} -// Note this impl gets most primitive types like bool, i32, etc -impl HeapSize for T { +impl HeapSize for f32 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} +impl HeapSize for f64 { fn heap_size(&self) -> usize { - self.heap_size() + 0 // no heap allocations } }