Skip to content

Commit

Permalink
deprecate
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Dec 5, 2024
1 parent 90c45e0 commit b0d96be
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 24 deletions.
16 changes: 8 additions & 8 deletions parquet/src/arrow/arrow_writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,10 @@ use arrow_array::types::*;
use arrow_array::{ArrayRef, RecordBatch, RecordBatchWriter};
use arrow_schema::{ArrowError, DataType as ArrowDataType, Field, IntervalUnit, SchemaRef};

use super::schema::{
add_encoded_arrow_schema_to_metadata, arrow_to_parquet_schema,
arrow_to_parquet_schema_with_root, decimal_length_from_precision,
};
use super::schema::{add_encoded_arrow_schema_to_metadata, decimal_length_from_precision};

use crate::arrow::arrow_writer::byte_array::ByteArrayEncoder;
use crate::arrow::ArrowToParquetSchemaConverter;
use crate::column::page::{CompressedPage, PageWriteSpec, PageWriter};
use crate::column::writer::encoder::ColumnValueEncoder;
use crate::column::writer::{
Expand Down Expand Up @@ -181,10 +179,12 @@ impl<W: Write + Send> ArrowWriter<W> {
options: ArrowWriterOptions,
) -> Result<Self> {
let mut props = options.properties;
let schema = match options.schema_root {
Some(s) => arrow_to_parquet_schema_with_root(&arrow_schema, &s, props.coerce_types())?,
None => arrow_to_parquet_schema(&arrow_schema, props.coerce_types())?,
};
let mut converter = ArrowToParquetSchemaConverter::new(&arrow_schema)
.with_coerce_types(props.coerce_types());
if let Some(s) = &options.schema_root {
converter = converter.schema_root(s);
}
let schema = converter.build()?;
if !options.skip_arrow_metadata {
// add serialized arrow schema
add_encoded_arrow_schema_to_metadata(&arrow_schema, &mut props);
Expand Down
4 changes: 2 additions & 2 deletions parquet/src/arrow/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ use crate::schema::types::SchemaDescriptor;
use arrow_schema::{FieldRef, Schema};

pub use self::schema::{
arrow_to_parquet_schema, parquet_to_arrow_field_levels, parquet_to_arrow_schema,
parquet_to_arrow_schema_by_columns, FieldLevels,
parquet_to_arrow_field_levels, parquet_to_arrow_schema, parquet_to_arrow_schema_by_columns,
ArrowToParquetSchemaConverter, FieldLevels,
};

/// Schema metadata key used to store serialized Arrow IPC schema
Expand Down
32 changes: 18 additions & 14 deletions parquet/src/arrow/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ pub struct ArrowToParquetSchemaConverter<'a> {
/// The schema to convert
schema: &'a Schema,
/// Name of the root schema in Parquet
root_schema_name: &'a str,
schema_root: &'a str,
/// Should we Coerce arrow types to compatible Parquet types?
///
/// See docs on [Self::with_coerce_types]`
Expand All @@ -248,7 +248,7 @@ impl <'a> ArrowToParquetSchemaConverter<'a> {
pub fn new(schema: &'a Schema) -> Self {
Self {
schema,
root_schema_name: "arrow_schema",
schema_root: "arrow_schema",
coerce_types: false,
}
}
Expand Down Expand Up @@ -278,14 +278,14 @@ impl <'a> ArrowToParquetSchemaConverter<'a> {
}

/// Set the root schema element name (defaults to `"arrow_schema"`).
pub fn with_root_schema_name(mut self, root_schema_name: &'a str) -> Self {
self.root_schema_name = root_schema_name;
pub fn schema_root(mut self, schema_root: &'a str) -> Self {
self.schema_root = schema_root;
self
}

/// Build the desired parquet [`SchemaDescriptor`]
pub fn build(self) -> Result<SchemaDescriptor> {
let Self { schema, root_schema_name, coerce_types } = self;
let Self { schema, schema_root: root_schema_name, coerce_types } = self;
let fields = schema
.fields()
.iter()
Expand All @@ -300,19 +300,20 @@ impl <'a> ArrowToParquetSchemaConverter<'a> {
///
/// The name of the root schema element defaults to `"arrow_schema"`, this can be
/// overridden with [`arrow_to_parquet_schema_with_root`]
pub fn arrow_to_parquet_schema(schema: &Schema, coerce_types: bool) -> Result<SchemaDescriptor> {
arrow_to_parquet_schema_with_root(schema, "arrow_schema", coerce_types)
#[deprecated(since = "54.0.0", note = "Use `ArrowToParquetSchemaConverter` instead")]
pub fn arrow_to_parquet_schema(schema: &Schema) -> Result<SchemaDescriptor> {

ArrowToParquetSchemaConverter::new(schema).build()
}

/// Convert arrow schema to parquet schema specifying the name of the root schema element
#[deprecated(since = "54.0.0", note = "Use `ArrowToParquetSchemaConverter` instead")]
pub fn arrow_to_parquet_schema_with_root(
schema: &Schema,
root: &str,
coerce_types: bool,
) -> Result<SchemaDescriptor> {
ArrowToParquetSchemaConverter::new(schema)
.with_root_schema_name(root)
.with_coerce_types(coerce_types)
.schema_root(root)
.build()
}

Expand Down Expand Up @@ -1637,7 +1638,7 @@ mod tests {
Field::new("decimal256", DataType::Decimal256(39, 2), false),
];
let arrow_schema = Schema::new(arrow_fields);
let converted_arrow_schema = arrow_to_parquet_schema(&arrow_schema, false).unwrap();
let converted_arrow_schema = ArrowToParquetSchemaConverter::new(&arrow_schema).build().unwrap();

assert_eq!(
parquet_schema.columns().len(),
Expand Down Expand Up @@ -1674,9 +1675,10 @@ mod tests {
false,
)];
let arrow_schema = Schema::new(arrow_fields);
let converted_arrow_schema = arrow_to_parquet_schema(&arrow_schema, true);
let converted_arrow_schema = ArrowToParquetSchemaConverter::new(&arrow_schema)
.with_coerce_types(true)
.build();

assert!(converted_arrow_schema.is_err());
converted_arrow_schema.unwrap();
}

Expand Down Expand Up @@ -1946,7 +1948,9 @@ mod tests {
// don't pass metadata so field ids are read from Parquet and not from serialized Arrow schema
let arrow_schema = crate::arrow::parquet_to_arrow_schema(&schema_descriptor, None)?;

let parq_schema_descr = crate::arrow::arrow_to_parquet_schema(&arrow_schema, true)?;
let parq_schema_descr = crate::arrow::ArrowToParquetSchemaConverter::new(&arrow_schema)
.with_coerce_types(true)
.build()?;
let parq_fields = parq_schema_descr.root_schema().get_fields();
assert_eq!(parq_fields.len(), 2);
assert_eq!(parq_fields[0].get_basic_info().id(), 1);
Expand Down

0 comments on commit b0d96be

Please sign in to comment.