From e8a0b7f3af42c3cbb62e4039c22435ab296ebeda Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Fri, 26 Jul 2024 13:17:04 -0700 Subject: [PATCH] add to_thrift to NativeIndex in prep for #6000 --- parquet/src/file/page_index/index.rs | 49 +++++++++++++++++++++ parquet/src/file/page_index/offset_index.rs | 2 +- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/parquet/src/file/page_index/index.rs b/parquet/src/file/page_index/index.rs index cebb602b31a1..1346cf08e6e7 100644 --- a/parquet/src/file/page_index/index.rs +++ b/parquet/src/file/page_index/index.rs @@ -225,6 +225,55 @@ impl NativeIndex { boundary_order: index.boundary_order, }) } + + // TODO: remove annotation after merge with #6000 + #[allow(dead_code)] + pub(crate) fn to_thrift(&self) -> ColumnIndex { + let min_values = self + .indexes + .iter() + .map(|x| x.min_bytes().map(|x| x.to_vec())) + .collect::>>() + .unwrap_or_else(|| vec![vec![]; self.indexes.len()]); + + let max_values = self + .indexes + .iter() + .map(|x| x.max_bytes().map(|x| x.to_vec())) + .collect::>>() + .unwrap_or_else(|| vec![vec![]; self.indexes.len()]); + + let null_counts = self + .indexes + .iter() + .map(|x| x.null_count()) + .collect::>>(); + + // Concatenate page histograms into a single Option + let repetition_level_histograms = self + .indexes + .iter() + .map(|x| x.repetition_level_histogram().map(|v| v.values())) + .collect::>>() + .map(|hists| hists.concat()); + + let definition_level_histograms = self + .indexes + .iter() + .map(|x| x.definition_level_histogram().map(|v| v.values())) + .collect::>>() + .map(|hists| hists.concat()); + + ColumnIndex::new( + self.indexes.iter().map(|x| x.min().is_none()).collect(), + min_values, + max_values, + self.boundary_order, + null_counts, + repetition_level_histograms, + definition_level_histograms, + ) + } } #[cfg(test)] diff --git a/parquet/src/file/page_index/offset_index.rs b/parquet/src/file/page_index/offset_index.rs index 2ae3464141ca..1cee696a5603 100644 --- a/parquet/src/file/page_index/offset_index.rs +++ b/parquet/src/file/page_index/offset_index.rs @@ -48,7 +48,7 @@ impl OffsetIndexMetaData { self.unencoded_byte_array_data_bytes.as_ref() } - // TODO: remove annotation after merge + // TODO: remove annotation after merge after merge with #6000 #[allow(dead_code)] pub(crate) fn to_thrift(&self) -> OffsetIndex { OffsetIndex::new(