Skip to content

Commit

Permalink
feat(parquet): introduce inverted index applier to reader (GreptimeTe…
Browse files Browse the repository at this point in the history
…am#3130)

* feat(parquet): introduce inverted index applier to reader

Signed-off-by: Zhenchi <[email protected]>

* feat: purger removes index file

Signed-off-by: Zhenchi <[email protected]>

* fix test

Signed-off-by: Zhenchi <[email protected]>

* chore: add TODO for escape route

Signed-off-by: Zhenchi <[email protected]>

* chore: add TODO for escape route

Signed-off-by: Zhenchi <[email protected]>

* Update src/mito2/src/access_layer.rs

Co-authored-by: dennis zhuang <[email protected]>

* Update src/mito2/src/sst/parquet/reader.rs

Co-authored-by: dennis zhuang <[email protected]>

* feat: min-max index to prune row groups filtered by inverted index

Signed-off-by: Zhenchi <[email protected]>

* feat: file_meta.inverted_index_available -> file_meta.available_indexes

Signed-off-by: Zhenchi <[email protected]>

* chore: add TODO for leveraging WriteCache

Signed-off-by: Zhenchi <[email protected]>

* fix fmt

Signed-off-by: Zhenchi <[email protected]>

* fix: misset available indexes

Signed-off-by: Zhenchi <[email protected]>

* feat: add index file size

Signed-off-by: Zhenchi <[email protected]>

* refactor: use smallvec to reduce heap allocation

Signed-off-by: Zhenchi <[email protected]>

* fix: add index size to disk usage

Signed-off-by: Zhenchi <[email protected]>

---------

Signed-off-by: Zhenchi <[email protected]>
Co-authored-by: dennis zhuang <[email protected]>
  • Loading branch information
zhongzc and killme2008 authored Jan 11, 2024
1 parent 312e8e8 commit fd8fb64
Show file tree
Hide file tree
Showing 25 changed files with 315 additions and 90 deletions.
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ rskafka = "0.5"
rust_decimal = "1.33"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
smallvec = "1"
smallvec = { version = "1", features = ["serde"] }
snafu = "0.7"
# on branch v0.38.x
sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "6a93567ae38d42be5c8d08b13c8ff4dde26502ef", features = [
Expand Down
2 changes: 1 addition & 1 deletion src/index/src/inverted_index/search/index_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::inverted_index::format::reader::InvertedIndexReader;
/// avoiding repeated compilation of fixed predicates such as regex patterns.
#[mockall::automock]
#[async_trait]
pub trait IndexApplier {
pub trait IndexApplier: Send + Sync {
/// Applies the predefined predicates to the data read by the given index reader, returning
/// a list of relevant indices (e.g., post IDs, group IDs, row IDs).
async fn apply<'a>(
Expand Down
26 changes: 20 additions & 6 deletions src/mito2/src/access_layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ use store_api::metadata::RegionMetadataRef;

use crate::cache::write_cache::SstUploadRequest;
use crate::cache::CacheManagerRef;
use crate::error::{CleanDirSnafu, DeleteSstSnafu, OpenDalSnafu, Result};
use crate::error::{CleanDirSnafu, DeleteIndexSnafu, DeleteSstSnafu, OpenDalSnafu, Result};
use crate::read::Source;
use crate::sst::file::{FileHandle, FileId};
use crate::sst::file::{FileHandle, FileId, FileMeta};
use crate::sst::location;
use crate::sst::parquet::reader::ParquetReaderBuilder;
use crate::sst::parquet::writer::ParquetWriter;
Expand Down Expand Up @@ -66,13 +66,27 @@ impl AccessLayer {
&self.object_store
}

/// Deletes a SST file with given file id.
pub(crate) async fn delete_sst(&self, file_id: FileId) -> Result<()> {
let path = location::sst_file_path(&self.region_dir, file_id);
/// Deletes a SST file (and its index file if it has one) with given file id.
pub(crate) async fn delete_sst(&self, file_meta: &FileMeta) -> Result<()> {
let path = location::sst_file_path(&self.region_dir, file_meta.file_id);
self.object_store
.delete(&path)
.await
.context(DeleteSstSnafu { file_id })
.context(DeleteSstSnafu {
file_id: file_meta.file_id,
})?;

if file_meta.inverted_index_available() {
let path = location::index_file_path(&self.region_dir, file_meta.file_id);
self.object_store
.delete(&path)
.await
.context(DeleteIndexSnafu {
file_id: file_meta.file_id,
})?;
}

Ok(())
}

/// Returns a reader builder for specific `file`.
Expand Down
2 changes: 2 additions & 0 deletions src/mito2/src/compaction/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ pub fn new_file_handle(
),
level,
file_size: 0,
available_indexes: Default::default(),
index_file_size: 0,
},
file_purger,
)
Expand Down
8 changes: 7 additions & 1 deletion src/mito2/src/compaction/twcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use common_telemetry::{debug, error, info};
use common_time::timestamp::TimeUnit;
use common_time::timestamp_millis::BucketAligned;
use common_time::Timestamp;
use smallvec::SmallVec;
use snafu::ResultExt;
use store_api::metadata::RegionMetadataRef;
use store_api::storage::RegionId;
Expand All @@ -39,7 +40,7 @@ use crate::read::{BoxedBatchReader, Source};
use crate::request::{
BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest,
};
use crate::sst::file::{FileHandle, FileId, FileMeta, Level};
use crate::sst::file::{FileHandle, FileId, FileMeta, IndexType, Level};
use crate::sst::file_purger::FilePurgerRef;
use crate::sst::parquet::WriteOptions;
use crate::sst::version::LevelMeta;
Expand Down Expand Up @@ -330,6 +331,11 @@ impl TwcsCompactionTask {
time_range: sst_info.time_range,
level: output.output_level,
file_size: sst_info.file_size,
available_indexes: sst_info
.inverted_index_available
.then(|| SmallVec::from_iter([IndexType::InvertedIndex]))
.unwrap_or_default(),
index_file_size: sst_info.index_file_size,
});
Ok(file_meta_opt)
});
Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/engine/basic_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,5 +553,5 @@ async fn test_region_usage() {
assert_eq!(region_stat.sst_usage, 2742);

// region total usage
assert_eq!(region_stat.disk_usage(), 3748);
assert_eq!(region_stat.disk_usage(), 3791);
}
10 changes: 9 additions & 1 deletion src/mito2/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,14 @@ pub enum Error {
location: Location,
},

#[snafu(display("Failed to delete index file, file id: {}", file_id))]
DeleteIndex {
file_id: FileId,
#[snafu(source)]
error: object_store::Error,
location: Location,
},

#[snafu(display("Failed to flush region {}", region_id))]
FlushRegion {
region_id: RegionId,
Expand Down Expand Up @@ -596,7 +604,7 @@ impl ErrorExt for Error {
InvalidSender { .. } => StatusCode::InvalidArguments,
InvalidSchedulerState { .. } => StatusCode::InvalidArguments,
StopScheduler { .. } => StatusCode::Internal,
DeleteSst { .. } => StatusCode::StorageUnavailable,
DeleteSst { .. } | DeleteIndex { .. } => StatusCode::StorageUnavailable,
FlushRegion { source, .. } => source.status_code(),
RegionDropped { .. } => StatusCode::Cancelled,
RegionClosed { .. } => StatusCode::Cancelled,
Expand Down
8 changes: 7 additions & 1 deletion src/mito2/src/flush.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;

use common_telemetry::{error, info};
use smallvec::SmallVec;
use snafu::ResultExt;
use store_api::storage::RegionId;
use strum::IntoStaticStr;
Expand All @@ -39,7 +40,7 @@ use crate::request::{
SenderWriteRequest, WorkerRequest,
};
use crate::schedule::scheduler::{Job, SchedulerRef};
use crate::sst::file::{FileId, FileMeta};
use crate::sst::file::{FileId, FileMeta, IndexType};
use crate::sst::file_purger::FilePurgerRef;
use crate::sst::parquet::WriteOptions;
use crate::worker::WorkerListener;
Expand Down Expand Up @@ -339,6 +340,11 @@ impl RegionFlushTask {
time_range: sst_info.time_range,
level: 0,
file_size: sst_info.file_size,
available_indexes: sst_info
.inverted_index_available
.then(|| SmallVec::from_iter([IndexType::InvertedIndex]))
.unwrap_or_default(),
index_file_size: sst_info.index_file_size,
};
file_metas.push(file_meta);
}
Expand Down
2 changes: 2 additions & 0 deletions src/mito2/src/manifest/tests/checkpoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ async fn checkpoint_with_different_compression_types() {
time_range: (0.into(), 10000000.into()),
level: 0,
file_size: 1024000,
available_indexes: Default::default(),
index_file_size: 0,
};
let action = RegionMetaActionList::new(vec![RegionMetaAction::Edit(RegionEdit {
files_to_add: vec![file_meta],
Expand Down
3 changes: 3 additions & 0 deletions src/mito2/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ lazy_static! {
/// Counter of filtered rows during merge.
pub static ref MERGE_FILTER_ROWS_TOTAL: IntCounterVec =
register_int_counter_vec!("greptime_mito_merge_filter_rows_total", "mito merge filter rows total", &[TYPE_LABEL]).unwrap();
/// Counter of row groups read.
pub static ref READ_ROW_GROUPS_TOTAL: IntCounterVec =
register_int_counter_vec!("greptime_mito_read_row_groups_total", "mito read row groups total", &[TYPE_LABEL]).unwrap();
// ------- End of query metrics.

// Cache related metrics.
Expand Down
22 changes: 21 additions & 1 deletion src/mito2/src/read/scan_region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@

//! Scans a region according to the scan request.
use std::sync::Arc;

use common_recordbatch::SendableRecordBatchStream;
use common_telemetry::debug;
use common_telemetry::{debug, warn};
use common_time::range::TimestampRange;
use store_api::storage::ScanRequest;
use table::predicate::{Predicate, TimeRangePredicateBuilder};
Expand All @@ -27,6 +29,8 @@ use crate::read::projection::ProjectionMapper;
use crate::read::seq_scan::SeqScan;
use crate::region::version::VersionRef;
use crate::sst::file::FileHandle;
use crate::sst::index::applier::builder::SstIndexApplierBuilder;
use crate::sst::index::applier::SstIndexApplierRef;

/// A scanner scans a region and returns a [SendableRecordBatchStream].
pub(crate) enum Scanner {
Expand Down Expand Up @@ -194,6 +198,7 @@ impl ScanRegion {
total_ssts
);

let index_applier = self.build_index_applier();
let predicate = Predicate::new(self.request.filters.clone());
// The mapper always computes projected column ids as the schema of SSTs may change.
let mapper = match &self.request.projection {
Expand All @@ -207,6 +212,7 @@ impl ScanRegion {
.with_memtables(memtables)
.with_files(files)
.with_cache(self.cache_manager)
.with_index_applier(index_applier)
.with_parallelism(self.parallelism);

Ok(seq_scan)
Expand All @@ -224,6 +230,20 @@ impl ScanRegion {
TimeRangePredicateBuilder::new(&time_index.column_schema.name, unit, &self.request.filters)
.build()
}

/// Use the latest schema to build the index applier.
fn build_index_applier(&self) -> Option<SstIndexApplierRef> {
SstIndexApplierBuilder::new(
self.access_layer.region_dir().to_string(),
self.access_layer.object_store().clone(),
self.version.metadata.as_ref(),
)
.build(&self.request.filters)
.inspect_err(|err| warn!(err; "Failed to build index applier"))
.ok()
.flatten()
.map(Arc::new)
}
}

/// Config for parallel scan.
Expand Down
11 changes: 11 additions & 0 deletions src/mito2/src/read/seq_scan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use crate::read::projection::ProjectionMapper;
use crate::read::scan_region::ScanParallism;
use crate::read::{BatchReader, BoxedBatchReader, BoxedBatchStream, Source};
use crate::sst::file::FileHandle;
use crate::sst::index::applier::SstIndexApplierRef;

/// Scans a region and returns rows in a sorted sequence.
///
Expand All @@ -62,6 +63,8 @@ pub struct SeqScan {
ignore_file_not_found: bool,
/// Parallelism to scan data.
parallelism: ScanParallism,
/// Index applier.
index_applier: Option<SstIndexApplierRef>,
}

impl SeqScan {
Expand All @@ -78,6 +81,7 @@ impl SeqScan {
cache_manager: None,
ignore_file_not_found: false,
parallelism: ScanParallism::default(),
index_applier: None,
}
}

Expand Down Expand Up @@ -130,6 +134,13 @@ impl SeqScan {
self
}

/// Sets index applier.
#[must_use]
pub(crate) fn with_index_applier(mut self, index_applier: Option<SstIndexApplierRef>) -> Self {
self.index_applier = index_applier;
self
}

/// Builds a stream for the query.
pub async fn build_stream(&self) -> Result<SendableRecordBatchStream> {
let start = Instant::now();
Expand Down
9 changes: 3 additions & 6 deletions src/mito2/src/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,8 +651,7 @@ impl OnFailure for FlushFinished {
// Clean flushed files.
for file in &self.file_metas {
self.file_purger.send_request(PurgeRequest {
region_id: file.region_id,
file_id: file.file_id,
file_meta: file.clone(),
});
}
}
Expand Down Expand Up @@ -707,14 +706,12 @@ impl OnFailure for CompactionFinished {
}));
}
for file in &self.compacted_files {
let file_id = file.file_id;
warn!(
"Cleaning region {} compaction output file: {}",
self.region_id, file_id
self.region_id, file.file_id
);
self.file_purger.send_request(PurgeRequest {
region_id: self.region_id,
file_id,
file_meta: file.clone(),
});
}
}
Expand Down
26 changes: 23 additions & 3 deletions src/mito2/src/sst/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use std::sync::Arc;

use common_time::Timestamp;
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
use snafu::{ResultExt, Snafu};
use store_api::storage::RegionId;
use uuid::Uuid;
Expand Down Expand Up @@ -95,6 +96,23 @@ pub struct FileMeta {
pub level: Level,
/// Size of the file.
pub file_size: u64,
/// Available indexes of the file.
pub available_indexes: SmallVec<[IndexType; 4]>,
/// Size of the index file.
pub index_file_size: u64,
}

/// Type of index.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum IndexType {
/// Inverted index.
InvertedIndex,
}

impl FileMeta {
pub fn inverted_index_available(&self) -> bool {
self.available_indexes.contains(&IndexType::InvertedIndex)
}
}

/// Handle to a SST file.
Expand Down Expand Up @@ -176,8 +194,7 @@ impl Drop for FileHandleInner {
fn drop(&mut self) {
if self.deleted.load(Ordering::Relaxed) {
self.file_purger.send_request(PurgeRequest {
region_id: self.meta.region_id,
file_id: self.meta.file_id,
file_meta: self.meta.clone(),
});
}
}
Expand Down Expand Up @@ -236,6 +253,8 @@ mod tests {
time_range: FileTimeRange::default(),
level,
file_size: 0,
available_indexes: SmallVec::from_iter([IndexType::InvertedIndex]),
index_file_size: 0,
}
}

Expand All @@ -250,7 +269,8 @@ mod tests {
#[test]
fn test_deserialize_from_string() {
let json_file_meta = "{\"region_id\":0,\"file_id\":\"bc5896ec-e4d8-4017-a80d-f2de73188d55\",\
\"time_range\":[{\"value\":0,\"unit\":\"Millisecond\"},{\"value\":0,\"unit\":\"Millisecond\"}],\"level\":0}";
\"time_range\":[{\"value\":0,\"unit\":\"Millisecond\"},{\"value\":0,\"unit\":\"Millisecond\"}],\
\"available_indexes\":[\"InvertedIndex\"],\"level\":0}";
let file_meta = create_file_meta(
FileId::from_str("bc5896ec-e4d8-4017-a80d-f2de73188d55").unwrap(),
0,
Expand Down
Loading

0 comments on commit fd8fb64

Please sign in to comment.