diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 3a07a238a4c9..5d3f2e6a84fa 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -446,10 +446,6 @@ config_namespace! { /// default parquet writer setting pub statistics_enabled: Option, default = Some("page".into()) - /// (writing) Sets max statistics size for any column. If NULL, uses - /// default parquet writer setting - pub max_statistics_size: Option, default = Some(4096) - /// (writing) Target maximum number of rows in each row group (defaults to 1M /// rows). Writing larger row groups requires more memory to write, but /// can get better compression and be faster to read. @@ -1621,10 +1617,6 @@ config_namespace_with_hashmap! { /// Sets bloom filter number of distinct values. If NULL, uses /// default parquet options pub bloom_filter_ndv: Option, default = None - - /// Sets max statistics size for the column path. If NULL, uses - /// default parquet options - pub max_statistics_size: Option, default = None } } diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs index dd9d67d6bb47..fab50750eb41 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer.rs @@ -26,7 +26,7 @@ use parquet::{ basic::{BrotliLevel, GzipLevel, ZstdLevel}, file::properties::{ EnabledStatistics, WriterProperties, WriterPropertiesBuilder, WriterVersion, - DEFAULT_MAX_STATISTICS_SIZE, DEFAULT_STATISTICS_ENABLED, + DEFAULT_STATISTICS_ENABLED, }, format::KeyValue, schema::types::ColumnPath, @@ -129,11 +129,6 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder { builder = builder.set_column_bloom_filter_ndv(path.clone(), bloom_filter_ndv); } - - if let Some(max_statistics_size) = options.max_statistics_size { - builder = - builder.set_column_max_statistics_size(path, max_statistics_size); - } } Ok(builder) @@ -154,7 +149,6 @@ impl ParquetOptions { dictionary_enabled, dictionary_page_size_limit, statistics_enabled, - max_statistics_size, max_row_group_size, created_by, column_index_truncate_length, @@ -190,9 +184,6 @@ impl ParquetOptions { .and_then(|s| parse_statistics_string(s).ok()) .unwrap_or(DEFAULT_STATISTICS_ENABLED), ) - .set_max_statistics_size( - max_statistics_size.unwrap_or(DEFAULT_MAX_STATISTICS_SIZE), - ) .set_max_row_group_size(*max_row_group_size) .set_created_by(created_by.clone()) .set_column_index_truncate_length(*column_index_truncate_length) @@ -395,7 +386,6 @@ mod tests { compression: Some("zstd(22)".into()), dictionary_enabled: src_col_defaults.dictionary_enabled.map(|v| !v), statistics_enabled: Some("none".into()), - max_statistics_size: Some(72), encoding: Some("RLE".into()), bloom_filter_enabled: Some(true), bloom_filter_fpp: Some(0.72), @@ -419,7 +409,6 @@ mod tests { dictionary_enabled: Some(!defaults.dictionary_enabled.unwrap_or(false)), dictionary_page_size_limit: 42, statistics_enabled: Some("chunk".into()), - max_statistics_size: Some(42), max_row_group_size: 42, created_by: "wordy".into(), column_index_truncate_length: Some(42), @@ -473,7 +462,6 @@ mod tests { ), bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp), bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv), - max_statistics_size: Some(props.max_statistics_size(&col)), } } @@ -523,7 +511,6 @@ mod tests { compression: default_col_props.compression, dictionary_enabled: default_col_props.dictionary_enabled, statistics_enabled: default_col_props.statistics_enabled, - max_statistics_size: default_col_props.max_statistics_size, bloom_filter_on_write: default_col_props .bloom_filter_enabled .unwrap_or_default(), diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto index 2da8b6066742..d3c71cf628d3 100644 --- a/datafusion/proto-common/proto/datafusion_common.proto +++ b/datafusion/proto-common/proto/datafusion_common.proto @@ -472,10 +472,6 @@ message ParquetColumnOptions { oneof bloom_filter_ndv_opt { uint64 bloom_filter_ndv = 7; } - - oneof max_statistics_size_opt { - uint32 max_statistics_size = 8; - } } message ParquetOptions { @@ -513,10 +509,6 @@ message ParquetOptions { string statistics_enabled = 13; } - oneof max_statistics_size_opt { - uint64 max_statistics_size = 14; - } - oneof column_index_truncate_length_opt { uint64 column_index_truncate_length = 17; } diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs index 11666486e49b..658f8c90ccd7 100644 --- a/datafusion/proto-common/src/from_proto/mod.rs +++ b/datafusion/proto-common/src/from_proto/mod.rs @@ -924,12 +924,6 @@ impl TryFrom<&protobuf::ParquetOptions> for ParquetOptions { protobuf::parquet_options::StatisticsEnabledOpt::StatisticsEnabled(v) => Some(v), }) .unwrap_or(None), - max_statistics_size: value - .max_statistics_size_opt.as_ref() - .map(|opt| match opt { - protobuf::parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => Some(*v as usize), - }) - .unwrap_or(None), max_row_group_size: value.max_row_group_size as usize, created_by: value.created_by.clone(), column_index_truncate_length: value @@ -984,12 +978,6 @@ impl TryFrom<&protobuf::ParquetColumnOptions> for ParquetColumnOptions { protobuf::parquet_column_options::StatisticsEnabledOpt::StatisticsEnabled(v) => Some(v), }) .unwrap_or(None), - max_statistics_size: value - .max_statistics_size_opt - .map(|opt| match opt { - protobuf::parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => Some(v as usize), - }) - .unwrap_or(None), encoding: value .encoding_opt.clone() .map(|opt| match opt { diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs index 6a75b14d35a8..6a75056ba4f7 100644 --- a/datafusion/proto-common/src/generated/pbjson.rs +++ b/datafusion/proto-common/src/generated/pbjson.rs @@ -4448,9 +4448,6 @@ impl serde::Serialize for ParquetColumnOptions { if self.bloom_filter_ndv_opt.is_some() { len += 1; } - if self.max_statistics_size_opt.is_some() { - len += 1; - } let mut struct_ser = serializer.serialize_struct("datafusion_common.ParquetColumnOptions", len)?; if let Some(v) = self.bloom_filter_enabled_opt.as_ref() { match v { @@ -4503,13 +4500,6 @@ impl serde::Serialize for ParquetColumnOptions { } } } - if let Some(v) = self.max_statistics_size_opt.as_ref() { - match v { - parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => { - struct_ser.serialize_field("maxStatisticsSize", v)?; - } - } - } struct_ser.end() } } @@ -4532,8 +4522,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions { "bloomFilterFpp", "bloom_filter_ndv", "bloomFilterNdv", - "max_statistics_size", - "maxStatisticsSize", ]; #[allow(clippy::enum_variant_names)] @@ -4545,7 +4533,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions { StatisticsEnabled, BloomFilterFpp, BloomFilterNdv, - MaxStatisticsSize, } impl<'de> serde::Deserialize<'de> for GeneratedField { fn deserialize(deserializer: D) -> std::result::Result @@ -4574,7 +4561,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions { "statisticsEnabled" | "statistics_enabled" => Ok(GeneratedField::StatisticsEnabled), "bloomFilterFpp" | "bloom_filter_fpp" => Ok(GeneratedField::BloomFilterFpp), "bloomFilterNdv" | "bloom_filter_ndv" => Ok(GeneratedField::BloomFilterNdv), - "maxStatisticsSize" | "max_statistics_size" => Ok(GeneratedField::MaxStatisticsSize), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } } @@ -4601,7 +4587,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions { let mut statistics_enabled_opt__ = None; let mut bloom_filter_fpp_opt__ = None; let mut bloom_filter_ndv_opt__ = None; - let mut max_statistics_size_opt__ = None; while let Some(k) = map_.next_key()? { match k { GeneratedField::BloomFilterEnabled => { @@ -4646,12 +4631,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions { } bloom_filter_ndv_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_column_options::BloomFilterNdvOpt::BloomFilterNdv(x.0)); } - GeneratedField::MaxStatisticsSize => { - if max_statistics_size_opt__.is_some() { - return Err(serde::de::Error::duplicate_field("maxStatisticsSize")); - } - max_statistics_size_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(x.0)); - } } } Ok(ParquetColumnOptions { @@ -4662,7 +4641,6 @@ impl<'de> serde::Deserialize<'de> for ParquetColumnOptions { statistics_enabled_opt: statistics_enabled_opt__, bloom_filter_fpp_opt: bloom_filter_fpp_opt__, bloom_filter_ndv_opt: bloom_filter_ndv_opt__, - max_statistics_size_opt: max_statistics_size_opt__, }) } } @@ -4946,9 +4924,6 @@ impl serde::Serialize for ParquetOptions { if self.statistics_enabled_opt.is_some() { len += 1; } - if self.max_statistics_size_opt.is_some() { - len += 1; - } if self.column_index_truncate_length_opt.is_some() { len += 1; } @@ -5063,15 +5038,6 @@ impl serde::Serialize for ParquetOptions { } } } - if let Some(v) = self.max_statistics_size_opt.as_ref() { - match v { - parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v) => { - #[allow(clippy::needless_borrow)] - #[allow(clippy::needless_borrows_for_generic_args)] - struct_ser.serialize_field("maxStatisticsSize", ToString::to_string(&v).as_str())?; - } - } - } if let Some(v) = self.column_index_truncate_length_opt.as_ref() { match v { parquet_options::ColumnIndexTruncateLengthOpt::ColumnIndexTruncateLength(v) => { @@ -5158,8 +5124,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions { "dictionaryEnabled", "statistics_enabled", "statisticsEnabled", - "max_statistics_size", - "maxStatisticsSize", "column_index_truncate_length", "columnIndexTruncateLength", "encoding", @@ -5194,7 +5158,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions { Compression, DictionaryEnabled, StatisticsEnabled, - MaxStatisticsSize, ColumnIndexTruncateLength, Encoding, BloomFilterFpp, @@ -5243,7 +5206,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions { "compression" => Ok(GeneratedField::Compression), "dictionaryEnabled" | "dictionary_enabled" => Ok(GeneratedField::DictionaryEnabled), "statisticsEnabled" | "statistics_enabled" => Ok(GeneratedField::StatisticsEnabled), - "maxStatisticsSize" | "max_statistics_size" => Ok(GeneratedField::MaxStatisticsSize), "columnIndexTruncateLength" | "column_index_truncate_length" => Ok(GeneratedField::ColumnIndexTruncateLength), "encoding" => Ok(GeneratedField::Encoding), "bloomFilterFpp" | "bloom_filter_fpp" => Ok(GeneratedField::BloomFilterFpp), @@ -5290,7 +5252,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions { let mut compression_opt__ = None; let mut dictionary_enabled_opt__ = None; let mut statistics_enabled_opt__ = None; - let mut max_statistics_size_opt__ = None; let mut column_index_truncate_length_opt__ = None; let mut encoding_opt__ = None; let mut bloom_filter_fpp_opt__ = None; @@ -5449,12 +5410,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions { } statistics_enabled_opt__ = map_.next_value::<::std::option::Option<_>>()?.map(parquet_options::StatisticsEnabledOpt::StatisticsEnabled); } - GeneratedField::MaxStatisticsSize => { - if max_statistics_size_opt__.is_some() { - return Err(serde::de::Error::duplicate_field("maxStatisticsSize")); - } - max_statistics_size_opt__ = map_.next_value::<::std::option::Option<::pbjson::private::NumberDeserialize<_>>>()?.map(|x| parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(x.0)); - } GeneratedField::ColumnIndexTruncateLength => { if column_index_truncate_length_opt__.is_some() { return Err(serde::de::Error::duplicate_field("columnIndexTruncateLength")); @@ -5505,7 +5460,6 @@ impl<'de> serde::Deserialize<'de> for ParquetOptions { compression_opt: compression_opt__, dictionary_enabled_opt: dictionary_enabled_opt__, statistics_enabled_opt: statistics_enabled_opt__, - max_statistics_size_opt: max_statistics_size_opt__, column_index_truncate_length_opt: column_index_truncate_length_opt__, encoding_opt: encoding_opt__, bloom_filter_fpp_opt: bloom_filter_fpp_opt__, diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs index 50a3cff5f568..d3f2bb5e250d 100644 --- a/datafusion/proto-common/src/generated/prost.rs +++ b/datafusion/proto-common/src/generated/prost.rs @@ -638,27 +638,29 @@ pub struct ParquetColumnSpecificOptions { #[derive(Clone, PartialEq, ::prost::Message)] pub struct ParquetColumnOptions { #[prost(oneof = "parquet_column_options::BloomFilterEnabledOpt", tags = "1")] - pub bloom_filter_enabled_opt: - ::core::option::Option, + pub bloom_filter_enabled_opt: ::core::option::Option< + parquet_column_options::BloomFilterEnabledOpt, + >, #[prost(oneof = "parquet_column_options::EncodingOpt", tags = "2")] pub encoding_opt: ::core::option::Option, #[prost(oneof = "parquet_column_options::DictionaryEnabledOpt", tags = "3")] - pub dictionary_enabled_opt: - ::core::option::Option, + pub dictionary_enabled_opt: ::core::option::Option< + parquet_column_options::DictionaryEnabledOpt, + >, #[prost(oneof = "parquet_column_options::CompressionOpt", tags = "4")] pub compression_opt: ::core::option::Option, #[prost(oneof = "parquet_column_options::StatisticsEnabledOpt", tags = "5")] - pub statistics_enabled_opt: - ::core::option::Option, + pub statistics_enabled_opt: ::core::option::Option< + parquet_column_options::StatisticsEnabledOpt, + >, #[prost(oneof = "parquet_column_options::BloomFilterFppOpt", tags = "6")] - pub bloom_filter_fpp_opt: - ::core::option::Option, + pub bloom_filter_fpp_opt: ::core::option::Option< + parquet_column_options::BloomFilterFppOpt, + >, #[prost(oneof = "parquet_column_options::BloomFilterNdvOpt", tags = "7")] - pub bloom_filter_ndv_opt: - ::core::option::Option, - #[prost(oneof = "parquet_column_options::MaxStatisticsSizeOpt", tags = "8")] - pub max_statistics_size_opt: - ::core::option::Option, + pub bloom_filter_ndv_opt: ::core::option::Option< + parquet_column_options::BloomFilterNdvOpt, + >, } /// Nested message and enum types in `ParquetColumnOptions`. pub mod parquet_column_options { @@ -697,11 +699,6 @@ pub mod parquet_column_options { #[prost(uint64, tag = "7")] BloomFilterNdv(u64), } - #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] - pub enum MaxStatisticsSizeOpt { - #[prost(uint32, tag = "8")] - MaxStatisticsSize(u32), - } } #[derive(Clone, PartialEq, ::prost::Message)] pub struct ParquetOptions { @@ -763,22 +760,23 @@ pub struct ParquetOptions { #[prost(string, tag = "16")] pub created_by: ::prost::alloc::string::String, #[prost(oneof = "parquet_options::MetadataSizeHintOpt", tags = "4")] - pub metadata_size_hint_opt: - ::core::option::Option, + pub metadata_size_hint_opt: ::core::option::Option< + parquet_options::MetadataSizeHintOpt, + >, #[prost(oneof = "parquet_options::CompressionOpt", tags = "10")] pub compression_opt: ::core::option::Option, #[prost(oneof = "parquet_options::DictionaryEnabledOpt", tags = "11")] - pub dictionary_enabled_opt: - ::core::option::Option, + pub dictionary_enabled_opt: ::core::option::Option< + parquet_options::DictionaryEnabledOpt, + >, #[prost(oneof = "parquet_options::StatisticsEnabledOpt", tags = "13")] - pub statistics_enabled_opt: - ::core::option::Option, - #[prost(oneof = "parquet_options::MaxStatisticsSizeOpt", tags = "14")] - pub max_statistics_size_opt: - ::core::option::Option, + pub statistics_enabled_opt: ::core::option::Option< + parquet_options::StatisticsEnabledOpt, + >, #[prost(oneof = "parquet_options::ColumnIndexTruncateLengthOpt", tags = "17")] - pub column_index_truncate_length_opt: - ::core::option::Option, + pub column_index_truncate_length_opt: ::core::option::Option< + parquet_options::ColumnIndexTruncateLengthOpt, + >, #[prost(oneof = "parquet_options::EncodingOpt", tags = "19")] pub encoding_opt: ::core::option::Option, #[prost(oneof = "parquet_options::BloomFilterFppOpt", tags = "21")] @@ -809,11 +807,6 @@ pub mod parquet_options { StatisticsEnabled(::prost::alloc::string::String), } #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] - pub enum MaxStatisticsSizeOpt { - #[prost(uint64, tag = "14")] - MaxStatisticsSize(u64), - } - #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum ColumnIndexTruncateLengthOpt { #[prost(uint64, tag = "17")] ColumnIndexTruncateLength(u64), @@ -861,9 +854,7 @@ pub struct ColumnStats { #[prost(message, optional, tag = "4")] pub distinct_count: ::core::option::Option, } -#[derive( - Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, -)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum JoinType { Inner = 0, @@ -910,9 +901,7 @@ impl JoinType { } } } -#[derive( - Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, -)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum JoinConstraint { On = 0, @@ -938,9 +927,7 @@ impl JoinConstraint { } } } -#[derive( - Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, -)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum TimeUnit { Second = 0, @@ -972,9 +959,7 @@ impl TimeUnit { } } } -#[derive( - Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, -)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum IntervalUnit { YearMonth = 0, @@ -1003,9 +988,7 @@ impl IntervalUnit { } } } -#[derive( - Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, -)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum UnionMode { Sparse = 0, @@ -1031,9 +1014,7 @@ impl UnionMode { } } } -#[derive( - Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, -)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum CompressionTypeVariant { Gzip = 0, @@ -1068,9 +1049,7 @@ impl CompressionTypeVariant { } } } -#[derive( - Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, -)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum JoinSide { LeftSide = 0, @@ -1099,9 +1078,7 @@ impl JoinSide { } } } -#[derive( - Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration, -)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum PrecisionInfo { Exact = 0, diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs index 25bda957ff8f..2cb8e0eb07f5 100644 --- a/datafusion/proto-common/src/to_proto/mod.rs +++ b/datafusion/proto-common/src/to_proto/mod.rs @@ -820,7 +820,6 @@ impl TryFrom<&ParquetOptions> for protobuf::ParquetOptions { dictionary_enabled_opt: value.dictionary_enabled.map(protobuf::parquet_options::DictionaryEnabledOpt::DictionaryEnabled), dictionary_page_size_limit: value.dictionary_page_size_limit as u64, statistics_enabled_opt: value.statistics_enabled.clone().map(protobuf::parquet_options::StatisticsEnabledOpt::StatisticsEnabled), - max_statistics_size_opt: value.max_statistics_size.map(|v| protobuf::parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v as u64)), max_row_group_size: value.max_row_group_size as u64, created_by: value.created_by.clone(), column_index_truncate_length_opt: value.column_index_truncate_length.map(|v| protobuf::parquet_options::ColumnIndexTruncateLengthOpt::ColumnIndexTruncateLength(v as u64)), @@ -857,11 +856,6 @@ impl TryFrom<&ParquetColumnOptions> for protobuf::ParquetColumnOptions { .statistics_enabled .clone() .map(protobuf::parquet_column_options::StatisticsEnabledOpt::StatisticsEnabled), - max_statistics_size_opt: value.max_statistics_size.map(|v| { - protobuf::parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize( - v as u32, - ) - }), encoding_opt: value .encoding .clone() diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs index fa77d23a6ae6..d3f2bb5e250d 100644 --- a/datafusion/proto/src/generated/datafusion_proto_common.rs +++ b/datafusion/proto/src/generated/datafusion_proto_common.rs @@ -661,10 +661,6 @@ pub struct ParquetColumnOptions { pub bloom_filter_ndv_opt: ::core::option::Option< parquet_column_options::BloomFilterNdvOpt, >, - #[prost(oneof = "parquet_column_options::MaxStatisticsSizeOpt", tags = "8")] - pub max_statistics_size_opt: ::core::option::Option< - parquet_column_options::MaxStatisticsSizeOpt, - >, } /// Nested message and enum types in `ParquetColumnOptions`. pub mod parquet_column_options { @@ -703,11 +699,6 @@ pub mod parquet_column_options { #[prost(uint64, tag = "7")] BloomFilterNdv(u64), } - #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] - pub enum MaxStatisticsSizeOpt { - #[prost(uint32, tag = "8")] - MaxStatisticsSize(u32), - } } #[derive(Clone, PartialEq, ::prost::Message)] pub struct ParquetOptions { @@ -782,10 +773,6 @@ pub struct ParquetOptions { pub statistics_enabled_opt: ::core::option::Option< parquet_options::StatisticsEnabledOpt, >, - #[prost(oneof = "parquet_options::MaxStatisticsSizeOpt", tags = "14")] - pub max_statistics_size_opt: ::core::option::Option< - parquet_options::MaxStatisticsSizeOpt, - >, #[prost(oneof = "parquet_options::ColumnIndexTruncateLengthOpt", tags = "17")] pub column_index_truncate_length_opt: ::core::option::Option< parquet_options::ColumnIndexTruncateLengthOpt, @@ -820,11 +807,6 @@ pub mod parquet_options { StatisticsEnabled(::prost::alloc::string::String), } #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] - pub enum MaxStatisticsSizeOpt { - #[prost(uint64, tag = "14")] - MaxStatisticsSize(u64), - } - #[derive(Clone, Copy, PartialEq, ::prost::Oneof)] pub enum ColumnIndexTruncateLengthOpt { #[prost(uint64, tag = "17")] ColumnIndexTruncateLength(u64), diff --git a/datafusion/proto/src/logical_plan/file_formats.rs b/datafusion/proto/src/logical_plan/file_formats.rs index 1e2b12dacc29..715c649e97c8 100644 --- a/datafusion/proto/src/logical_plan/file_formats.rs +++ b/datafusion/proto/src/logical_plan/file_formats.rs @@ -379,9 +379,6 @@ impl TableParquetOptionsProto { statistics_enabled_opt: global_options.global.statistics_enabled.map(|enabled| { parquet_options::StatisticsEnabledOpt::StatisticsEnabled(enabled) }), - max_statistics_size_opt: global_options.global.max_statistics_size.map(|size| { - parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(size as u64) - }), max_row_group_size: global_options.global.max_row_group_size as u64, created_by: global_options.global.created_by.clone(), column_index_truncate_length_opt: global_options.global.column_index_truncate_length.map(|length| { @@ -430,9 +427,6 @@ impl TableParquetOptionsProto { bloom_filter_ndv_opt: options.bloom_filter_ndv.map(|ndv| { parquet_column_options::BloomFilterNdvOpt::BloomFilterNdv(ndv) }), - max_statistics_size_opt: options.max_statistics_size.map(|size| { - parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(size as u32) - }), }) } }).collect(), @@ -470,9 +464,6 @@ impl From<&ParquetOptionsProto> for ParquetOptions { statistics_enabled: proto.statistics_enabled_opt.as_ref().map(|opt| match opt { parquet_options::StatisticsEnabledOpt::StatisticsEnabled(statistics) => statistics.clone(), }), - max_statistics_size: proto.max_statistics_size_opt.as_ref().map(|opt| match opt { - parquet_options::MaxStatisticsSizeOpt::MaxStatisticsSize(size) => *size as usize, - }), max_row_group_size: proto.max_row_group_size as usize, created_by: proto.created_by.clone(), column_index_truncate_length: proto.column_index_truncate_length_opt.as_ref().map(|opt| match opt { @@ -523,11 +514,6 @@ impl From for ParquetColumnOptions { bloom_filter_ndv: proto .bloom_filter_ndv_opt .map(|parquet_column_options::BloomFilterNdvOpt::BloomFilterNdv(v)| v), - max_statistics_size: proto.max_statistics_size_opt.map( - |parquet_column_options::MaxStatisticsSizeOpt::MaxStatisticsSize(v)| { - v as usize - }, - ), } } }