Skip to content

Commit

Permalink
add to_thrift to NativeIndex in prep for apache#6000
Browse files Browse the repository at this point in the history
  • Loading branch information
etseidl committed Jul 26, 2024
1 parent 2f7a9ac commit e8a0b7f
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 1 deletion.
49 changes: 49 additions & 0 deletions parquet/src/file/page_index/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,55 @@ impl<T: ParquetValueType> NativeIndex<T> {
boundary_order: index.boundary_order,
})
}

// TODO: remove annotation after merge with #6000
#[allow(dead_code)]
pub(crate) fn to_thrift(&self) -> ColumnIndex {
let min_values = self
.indexes
.iter()
.map(|x| x.min_bytes().map(|x| x.to_vec()))
.collect::<Option<Vec<_>>>()
.unwrap_or_else(|| vec![vec![]; self.indexes.len()]);

let max_values = self
.indexes
.iter()
.map(|x| x.max_bytes().map(|x| x.to_vec()))
.collect::<Option<Vec<_>>>()
.unwrap_or_else(|| vec![vec![]; self.indexes.len()]);

let null_counts = self
.indexes
.iter()
.map(|x| x.null_count())
.collect::<Option<Vec<_>>>();

// Concatenate page histograms into a single Option<Vec>
let repetition_level_histograms = self
.indexes
.iter()
.map(|x| x.repetition_level_histogram().map(|v| v.values()))
.collect::<Option<Vec<&[i64]>>>()
.map(|hists| hists.concat());

let definition_level_histograms = self
.indexes
.iter()
.map(|x| x.definition_level_histogram().map(|v| v.values()))
.collect::<Option<Vec<&[i64]>>>()
.map(|hists| hists.concat());

ColumnIndex::new(
self.indexes.iter().map(|x| x.min().is_none()).collect(),
min_values,
max_values,
self.boundary_order,
null_counts,
repetition_level_histograms,
definition_level_histograms,
)
}
}

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion parquet/src/file/page_index/offset_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ impl OffsetIndexMetaData {
self.unencoded_byte_array_data_bytes.as_ref()
}

// TODO: remove annotation after merge
// TODO: remove annotation after merge after merge with #6000
#[allow(dead_code)]
pub(crate) fn to_thrift(&self) -> OffsetIndex {
OffsetIndex::new(
Expand Down

0 comments on commit e8a0b7f

Please sign in to comment.