Skip to content

Commit

Permalink
feat: add filter metrics
Browse files Browse the repository at this point in the history
Signed-off-by: Zhenchi <[email protected]>
  • Loading branch information
zhongzc committed Dec 30, 2023
1 parent d5c38b3 commit 983c09c
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 13 deletions.
3 changes: 3 additions & 0 deletions src/mito2/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ lazy_static! {
/// Counter of filtered rows during merge.
pub static ref MERGE_FILTER_ROWS_TOTAL: IntCounterVec =
register_int_counter_vec!("mito_merge_filter_rows_total", "mito merge filter rows total", &[TYPE_LABEL]).unwrap();
/// Counter of row groups read.
pub static ref READ_ROW_GROUPS_TOTAL: IntCounterVec =
register_int_counter_vec!("mito_read_row_groups_total", "mito read row groups total", &[TYPE_LABEL]).unwrap();
// ------- End of query metrics.

// Cache related metrics.
Expand Down
42 changes: 29 additions & 13 deletions src/mito2/src/sst/parquet/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ use crate::error::{
ArrowReaderSnafu, InvalidMetadataSnafu, InvalidParquetSnafu, OpenDalSnafu, ReadParquetSnafu,
Result,
};
use crate::metrics::{READ_ROWS_TOTAL, READ_STAGE_ELAPSED};
use crate::metrics::{READ_ROWS_TOTAL, READ_ROW_GROUPS_TOTAL, READ_STAGE_ELAPSED};
use crate::read::{Batch, BatchReader};
use crate::sst::file::FileHandle;
use crate::sst::index::applier::SstIndexApplierRef;
Expand Down Expand Up @@ -156,8 +156,12 @@ impl ParquetReaderBuilder {
parquet_to_arrow_field_levels(parquet_schema_desc, projection_mask.clone(), hint)
.context(ReadParquetSnafu { path: &file_path })?;

let mut metrics = Metrics::default();

// Computes row groups to read.
let row_groups = self.row_groups_to_read(&read_format, &parquet_meta).await;
let row_groups = self
.row_groups_to_read(&read_format, &parquet_meta, &mut metrics)
.await;

let reader_builder = RowGroupReaderBuilder {
file_handle: self.file_handle.clone(),
Expand All @@ -169,12 +173,7 @@ impl ParquetReaderBuilder {
cache_manager: self.cache_manager.clone(),
};

let metrics = Metrics {
read_row_groups: row_groups.len(),
build_cost: start.elapsed(),
..Default::default()
};

metrics.build_cost = start.elapsed();
Ok(ParquetReader {
row_groups,
read_format,
Expand Down Expand Up @@ -247,8 +246,10 @@ impl ParquetReaderBuilder {
&self,
read_format: &ReadFormat,
parquet_meta: &ParquetMetaData,
metrics: &mut Metrics,
) -> BTreeSet<usize> {
let mut row_group_ids = (0..parquet_meta.num_row_groups()).collect();
let mut row_group_ids: BTreeSet<_> = (0..parquet_meta.num_row_groups()).collect();
metrics.num_unfiltered_row_groups += row_group_ids.len();

// Applies index to prune row groups.
if let Some(index_applier) = &self.index_applier {
Expand All @@ -262,6 +263,7 @@ impl ParquetReaderBuilder {
}
}
}
metrics.num_inverted_index_filtered_row_groups += row_group_ids.len();

// Prunes row groups by metadata.
if let Some(predicate) = &self.predicate {
Expand All @@ -287,6 +289,7 @@ impl ParquetReaderBuilder {
row_group_ids.remove(&row_group_id);
}
};
metrics.num_min_max_filtered_row_groups += row_group_ids.len();

row_group_ids
}
Expand All @@ -295,8 +298,12 @@ impl ParquetReaderBuilder {
/// Parquet reader metrics.
#[derive(Debug, Default)]
struct Metrics {
/// Number of row groups to read.
read_row_groups: usize,
/// Number of unfiltered row groups.
num_unfiltered_row_groups: usize,
/// Number of row groups to read after filtering by inverted index.
num_inverted_index_filtered_row_groups: usize,
/// Number of row groups to read after filtering by min-max index.
num_min_max_filtered_row_groups: usize,
/// Duration to build the parquet reader.
build_cost: Duration,
/// Duration to scan the reader.
Expand Down Expand Up @@ -424,8 +431,8 @@ impl Drop for ParquetReader {
self.reader_builder.file_handle.region_id(),
self.reader_builder.file_handle.file_id(),
self.reader_builder.file_handle.time_range(),
self.metrics.read_row_groups,
self.reader_builder.parquet_meta.num_row_groups(),
self.metrics.num_min_max_filtered_row_groups,
self.metrics.num_unfiltered_row_groups,
self.metrics
);

Expand All @@ -439,6 +446,15 @@ impl Drop for ParquetReader {
READ_ROWS_TOTAL
.with_label_values(&["parquet"])
.inc_by(self.metrics.num_rows as u64);
READ_ROW_GROUPS_TOTAL
.with_label_values(&["unfiltered"])
.inc_by(self.metrics.num_unfiltered_row_groups as u64);
READ_ROW_GROUPS_TOTAL
.with_label_values(&["inverted_index_filtered"])
.inc_by(self.metrics.num_inverted_index_filtered_row_groups as u64);
READ_ROW_GROUPS_TOTAL
.with_label_values(&["min_max_filtered"])
.inc_by(self.metrics.num_min_max_filtered_row_groups as u64);
}
}

Expand Down

0 comments on commit 983c09c

Please sign in to comment.