-
Notifications
You must be signed in to change notification settings - Fork 332
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(inverted_index.search): add index applier (#2868)
* feat(inverted_index.search): add fst applier Signed-off-by: Zhenchi <[email protected]> * fix: typos Signed-off-by: Zhenchi <[email protected]> * feat(inverted_index.search): add fst values mapper Signed-off-by: Zhenchi <[email protected]> * chore: remove meta check Signed-off-by: Zhenchi <[email protected]> * fix: fmt & clippy Signed-off-by: Zhenchi <[email protected]> * refactor: one expect for test Signed-off-by: Zhenchi <[email protected]> * feat(inverted_index.search): add index applier Signed-off-by: Zhenchi <[email protected]> * refactor: bitmap_full -> bitmap_full_range Signed-off-by: Zhenchi <[email protected]> * feat: add check for segment_row_count Signed-off-by: Zhenchi <[email protected]> * fix: remove redundant code Signed-off-by: Zhenchi <[email protected]> * fix: reader test Signed-off-by: Zhenchi <[email protected]> * chore: match error in test Signed-off-by: Zhenchi <[email protected]> * fix: fmt Signed-off-by: Zhenchi <[email protected]> * refactor: add helper function to construct fst value Signed-off-by: Zhenchi <[email protected]> * refactor: polish unit tests Signed-off-by: Zhenchi <[email protected]> * refactor: bytemuck to extract offset and size Signed-off-by: Zhenchi <[email protected]> * fix: toml format Signed-off-by: Zhenchi <[email protected]> * refactor: use bytemuck Signed-off-by: Zhenchi <[email protected]> * refactor: reorg value in unit tests Signed-off-by: Zhenchi <[email protected]> * chore: update proto Signed-off-by: Zhenchi <[email protected]> * chore: add a TODO reminder to consider optimizing the order of apply Signed-off-by: Zhenchi <[email protected]> * refactor: InList predicates are applied first to benefit from higher selectivity Signed-off-by: Zhenchi <[email protected]> * chore: update proto Signed-off-by: Zhenchi <[email protected]> * feat: add read options to control the behavior of index not found Signed-off-by: Zhenchi <[email protected]> * refactor: polish Signed-off-by: Zhenchi <[email protected]> * refactor: move read options to implementation instead of trait Signed-off-by: Zhenchi <[email protected]> * feat: add SearchContext, refine doc comments Signed-off-by: Zhenchi <[email protected]> * feat: move index_not_found_strategy as a field of SearchContext Signed-off-by: Zhenchi <[email protected]> * chore: rename varient Signed-off-by: Zhenchi <[email protected]> --------- Signed-off-by: Zhenchi <[email protected]>
- Loading branch information
Showing
12 changed files
with
434 additions
and
12 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,4 +14,5 @@ | |
|
||
pub mod fst_apply; | ||
pub mod fst_values_mapper; | ||
pub mod index_apply; | ||
pub mod predicate; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
// Copyright 2023 Greptime Team | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
mod predicates_apply; | ||
|
||
use async_trait::async_trait; | ||
pub use predicates_apply::PredicatesIndexApplier; | ||
|
||
use crate::inverted_index::error::Result; | ||
use crate::inverted_index::format::reader::InvertedIndexReader; | ||
|
||
/// A trait for processing and transforming indices obtained from an inverted index. | ||
/// | ||
/// Applier instances are reusable and work with various `InvertedIndexReader` instances, | ||
/// avoiding repeated compilation of fixed predicates such as regex patterns. | ||
#[async_trait] | ||
pub trait IndexApplier { | ||
/// Applies the predefined predicates to the data read by the given index reader, returning | ||
/// a list of relevant indices (e.g., post IDs, group IDs, row IDs). | ||
async fn apply( | ||
&self, | ||
context: SearchContext, | ||
reader: &mut dyn InvertedIndexReader, | ||
) -> Result<Vec<usize>>; | ||
} | ||
|
||
/// A context for searching the inverted index. | ||
#[derive(Clone, Debug, Eq, PartialEq, Default)] | ||
pub struct SearchContext { | ||
/// `index_not_found_strategy` controls the behavior of the applier when the index is not found. | ||
pub index_not_found_strategy: IndexNotFoundStrategy, | ||
} | ||
|
||
/// Defines the behavior of an applier when the index is not found. | ||
#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)] | ||
pub enum IndexNotFoundStrategy { | ||
/// Return an empty list of indices. | ||
#[default] | ||
ReturnEmpty, | ||
|
||
/// Ignore the index and continue. | ||
Ignore, | ||
|
||
/// Throw an error. | ||
ThrowError, | ||
} |
Oops, something went wrong.