From 90c45e0390c9913030c13e4ee0f3a09b2eeb8707 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Dec 2024 15:26:51 -0500 Subject: [PATCH] compiles --- parquet/src/arrow/schema/mod.rs | 35 ++++++++++++++++++++------------- parquet/src/file/properties.rs | 5 ++--- 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs index a1ea23728bc6..4b5e0dadc111 100644 --- a/parquet/src/arrow/schema/mod.rs +++ b/parquet/src/arrow/schema/mod.rs @@ -227,13 +227,16 @@ pub(crate) fn add_encoded_arrow_schema_to_metadata(schema: &Schema, props: &mut /// Converter for arrow schema to parquet schema /// -/// Notes: +/// Example: +/// ``` +/// todo!() +/// ``` #[derive(Debug)] pub struct ArrowToParquetSchemaConverter<'a> { /// The schema to convert - schema: &'a Schema + schema: &'a Schema, /// Name of the root schema in Parquet - root_schema_name: &str, + root_schema_name: &'a str, /// Should we Coerce arrow types to compatible Parquet types? /// /// See docs on [Self::with_coerce_types]` @@ -275,14 +278,21 @@ impl <'a> ArrowToParquetSchemaConverter<'a> { } /// Set the root schema element name (defaults to `"arrow_schema"`). - pub fn with_root_schema_name<'b>(mut self, root_schema_name: &'b str) -> Self<'b> { + pub fn with_root_schema_name(mut self, root_schema_name: &'a str) -> Self { self.root_schema_name = root_schema_name; self } - /// Build the parquet schema according - pub fn convert(self) -> Result { - + /// Build the desired parquet [`SchemaDescriptor`] + pub fn build(self) -> Result { + let Self { schema, root_schema_name, coerce_types } = self; + let fields = schema + .fields() + .iter() + .map(|field| arrow_to_parquet_type(field, coerce_types).map(Arc::new)) + .collect::>()?; + let group = Type::group_type_builder(root_schema_name).with_fields(fields).build()?; + Ok(SchemaDescriptor::new(Arc::new(group))) } } @@ -300,13 +310,10 @@ pub fn arrow_to_parquet_schema_with_root( root: &str, coerce_types: bool, ) -> Result { - let fields = schema - .fields() - .iter() - .map(|field| arrow_to_parquet_type(field, coerce_types).map(Arc::new)) - .collect::>()?; - let group = Type::group_type_builder(root).with_fields(fields).build()?; - Ok(SchemaDescriptor::new(Arc::new(group))) + ArrowToParquetSchemaConverter::new(schema) + .with_root_schema_name(root) + .with_coerce_types(coerce_types) + .build() } fn parse_key_value_metadata( diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs index b6dbb2ff552b..53d51b914d2b 100644 --- a/parquet/src/file/properties.rs +++ b/parquet/src/file/properties.rs @@ -16,14 +16,13 @@ // under the License. //! Configuration via [`WriterProperties`] and [`ReaderProperties`] -use std::str::FromStr; -use std::{collections::HashMap, sync::Arc}; -use arrow_schema::DataType; use crate::basic::{Compression, Encoding}; use crate::compression::{CodecOptions, CodecOptionsBuilder}; use crate::file::metadata::KeyValue; use crate::format::SortingColumn; use crate::schema::types::ColumnPath; +use std::str::FromStr; +use std::{collections::HashMap, sync::Arc}; /// Default value for [`WriterProperties::data_page_size_limit`] pub const DEFAULT_PAGE_SIZE: usize = 1024 * 1024;