From 81aa7a4caf82f75f741a9305b9e31f2df95687d4 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Mon, 9 Oct 2023 19:10:12 +0800 Subject: [PATCH] chore(mito): change default batch size/row group size (#2550) --- src/mito2/src/sst/parquet.rs | 5 ++++- src/mito2/src/sst/parquet/reader.rs | 5 +++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/mito2/src/sst/parquet.rs b/src/mito2/src/sst/parquet.rs index 0cced7e2575c..872c0e410408 100644 --- a/src/mito2/src/sst/parquet.rs +++ b/src/mito2/src/sst/parquet.rs @@ -26,7 +26,10 @@ use crate::sst::file::FileTimeRange; /// Key of metadata in parquet SST. pub const PARQUET_METADATA_KEY: &str = "greptime:metadata"; const DEFAULT_WRITE_BUFFER_SIZE: ReadableSize = ReadableSize::mb(8); -const DEFAULT_ROW_GROUP_SIZE: usize = 100000; +/// Default batch size to read parquet files. +pub(crate) const DEFAULT_READ_BATCH_SIZE: usize = 1024; +/// Default row group size for parquet files. +const DEFAULT_ROW_GROUP_SIZE: usize = 100 * DEFAULT_READ_BATCH_SIZE; /// Parquet write options. #[derive(Debug)] diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs index e54391b15a4a..3eade74a4c62 100644 --- a/src/mito2/src/sst/parquet/reader.rs +++ b/src/mito2/src/sst/parquet/reader.rs @@ -46,7 +46,7 @@ use crate::read::{Batch, BatchReader}; use crate::sst::file::{FileHandle, FileId}; use crate::sst::parquet::format::ReadFormat; use crate::sst::parquet::stats::RowGroupPruningStats; -use crate::sst::parquet::PARQUET_METADATA_KEY; +use crate::sst::parquet::{DEFAULT_READ_BATCH_SIZE, PARQUET_METADATA_KEY}; /// Parquet SST reader builder. pub struct ParquetReaderBuilder { @@ -147,7 +147,8 @@ impl ParquetReaderBuilder { }; let mut builder = ParquetRecordBatchStreamBuilder::new(reader) .await - .context(ReadParquetSnafu { path: file_path })?; + .context(ReadParquetSnafu { path: file_path })? + .with_batch_size(DEFAULT_READ_BATCH_SIZE); // Decode region metadata. let key_value_meta = builder.metadata().file_metadata().key_value_metadata();