Skip to content

Commit

Permalink
feat(inverted_index.search): add index applier (#2868)
Browse files Browse the repository at this point in the history
* feat(inverted_index.search): add fst applier

Signed-off-by: Zhenchi <[email protected]>

* fix: typos

Signed-off-by: Zhenchi <[email protected]>

* feat(inverted_index.search): add fst values mapper

Signed-off-by: Zhenchi <[email protected]>

* chore: remove meta check

Signed-off-by: Zhenchi <[email protected]>

* fix: fmt & clippy

Signed-off-by: Zhenchi <[email protected]>

* refactor: one expect for test

Signed-off-by: Zhenchi <[email protected]>

* feat(inverted_index.search): add index applier

Signed-off-by: Zhenchi <[email protected]>

* refactor: bitmap_full -> bitmap_full_range

Signed-off-by: Zhenchi <[email protected]>

* feat: add check for segment_row_count

Signed-off-by: Zhenchi <[email protected]>

* fix: remove redundant code

Signed-off-by: Zhenchi <[email protected]>

* fix: reader test

Signed-off-by: Zhenchi <[email protected]>

* chore: match error in test

Signed-off-by: Zhenchi <[email protected]>

* fix: fmt

Signed-off-by: Zhenchi <[email protected]>

* refactor: add helper function to construct fst value

Signed-off-by: Zhenchi <[email protected]>

* refactor: polish unit tests

Signed-off-by: Zhenchi <[email protected]>

* refactor: bytemuck to extract offset and size

Signed-off-by: Zhenchi <[email protected]>

* fix: toml format

Signed-off-by: Zhenchi <[email protected]>

* refactor: use bytemuck

Signed-off-by: Zhenchi <[email protected]>

* refactor: reorg value in unit tests

Signed-off-by: Zhenchi <[email protected]>

* chore: update proto

Signed-off-by: Zhenchi <[email protected]>

* chore: add a TODO reminder to consider optimizing the order of apply

Signed-off-by: Zhenchi <[email protected]>

* refactor: InList predicates are applied first to benefit from higher selectivity

Signed-off-by: Zhenchi <[email protected]>

* chore: update proto

Signed-off-by: Zhenchi <[email protected]>

* feat: add read options to control the behavior of index not found

Signed-off-by: Zhenchi <[email protected]>

* refactor: polish

Signed-off-by: Zhenchi <[email protected]>

* refactor: move read options to implementation instead of trait

Signed-off-by: Zhenchi <[email protected]>

* feat: add SearchContext, refine doc comments

Signed-off-by: Zhenchi <[email protected]>

* feat: move index_not_found_strategy as a field of SearchContext

Signed-off-by: Zhenchi <[email protected]>

* chore: rename varient

Signed-off-by: Zhenchi <[email protected]>

---------

Signed-off-by: Zhenchi <[email protected]>
  • Loading branch information
zhongzc authored Dec 5, 2023
1 parent aa89d9d commit 0b421b5
Show file tree
Hide file tree
Showing 12 changed files with 434 additions and 12 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ etcd-client = "0.12"
fst = "0.4.7"
futures = "0.3"
futures-util = "0.3"
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "2aaee38de81047537dfa42af9df63bcfb866e06c" }
greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "b1d403088f02136bcebde53d604f491c260ca8e2" }
humantime-serde = "1.1"
itertools = "0.10"
lazy_static = "1.4"
Expand Down
10 changes: 9 additions & 1 deletion src/index/src/inverted_index/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ pub enum Error {
payload_size: u64,
},

#[snafu(display("Unexpected zero segment row count"))]
UnexpectedZeroSegmentRowCount { location: Location },

#[snafu(display("Failed to decode fst"))]
DecodeFst {
#[snafu(source)]
Expand Down Expand Up @@ -109,6 +112,9 @@ pub enum Error {
location: Location,
predicates: Vec<Predicate>,
},

#[snafu(display("index not found, name: {name}"))]
IndexNotFound { name: String, location: Location },
}

impl ErrorExt for Error {
Expand All @@ -118,6 +124,7 @@ impl ErrorExt for Error {
Seek { .. }
| Read { .. }
| UnexpectedFooterPayloadSize { .. }
| UnexpectedZeroSegmentRowCount { .. }
| UnexpectedOffsetSize { .. }
| UnexpectedBlobSize { .. }
| DecodeProto { .. }
Expand All @@ -128,7 +135,8 @@ impl ErrorExt for Error {
| ParseDFA { .. }
| KeysApplierWithoutInList { .. }
| IntersectionApplierWithInList { .. }
| EmptyPredicates { .. } => StatusCode::InvalidArguments,
| EmptyPredicates { .. }
| IndexNotFound { .. } => StatusCode::InvalidArguments,
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/index/src/inverted_index/format/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use crate::inverted_index::FstMap;
/// InvertedIndexReader defines an asynchronous reader of inverted index data
#[mockall::automock]
#[async_trait]
pub trait InvertedIndexReader {
pub trait InvertedIndexReader: Send {
/// Retrieve metadata of all inverted indices stored within the blob.
async fn metadata(&mut self) -> Result<InvertedIndexMetas>;

Expand Down
6 changes: 5 additions & 1 deletion src/index/src/inverted_index/format/reader/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,11 @@ mod tests {
};

// metas
let mut metas = InvertedIndexMetas::default();
let mut metas = InvertedIndexMetas {
total_row_count: 10,
segment_row_count: 1,
..Default::default()
};
metas.metas.insert(meta.name.clone(), meta);
metas.metas.insert(meta1.name.clone(), meta1);
let mut meta_buf = Vec::new();
Expand Down
16 changes: 10 additions & 6 deletions src/index/src/inverted_index/format/reader/footer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use snafu::{ensure, ResultExt};

use crate::inverted_index::error::{
DecodeProtoSnafu, ReadSnafu, Result, SeekSnafu, UnexpectedFooterPayloadSizeSnafu,
UnexpectedOffsetSizeSnafu,
UnexpectedOffsetSizeSnafu, UnexpectedZeroSegmentRowCountSnafu,
};
use crate::inverted_index::format::FOOTER_PAYLOAD_SIZE_SIZE;

Expand Down Expand Up @@ -85,6 +85,11 @@ impl<R: AsyncRead + AsyncSeek + Unpin> InvertedIndeFooterReader<R> {

/// Check if the read metadata is consistent with expected sizes and offsets.
fn validate_metas(&self, metas: &InvertedIndexMetas, payload_size: u64) -> Result<()> {
ensure!(
metas.segment_row_count > 0,
UnexpectedZeroSegmentRowCountSnafu
);

for meta in metas.metas.values() {
let InvertedIndexMeta {
base_offset,
Expand Down Expand Up @@ -116,7 +121,10 @@ mod tests {
use super::*;

fn create_test_payload(meta: InvertedIndexMeta) -> Vec<u8> {
let mut metas = InvertedIndexMetas::default();
let mut metas = InvertedIndexMetas {
segment_row_count: 1,
..Default::default()
};
metas.metas.insert("test".to_string(), meta);

let mut payload_buf = vec![];
Expand All @@ -131,7 +139,6 @@ mod tests {
async fn test_read_payload() {
let meta = InvertedIndexMeta {
name: "test".to_string(),
segment_row_count: 4096,
..Default::default()
};

Expand All @@ -145,14 +152,12 @@ mod tests {
assert_eq!(metas.metas.len(), 1);
let index_meta = &metas.metas.get("test").unwrap();
assert_eq!(index_meta.name, "test");
assert_eq!(index_meta.segment_row_count, 4096);
}

#[tokio::test]
async fn test_invalid_footer_payload_size() {
let meta = InvertedIndexMeta {
name: "test".to_string(),
segment_row_count: 4096,
..Default::default()
};

Expand All @@ -171,7 +176,6 @@ mod tests {
name: "test".to_string(),
base_offset: 0,
inverted_index_size: 1, // Set size to 1 to make ecceed the blob size
segment_row_count: 4096,
..Default::default()
};

Expand Down
1 change: 1 addition & 0 deletions src/index/src/inverted_index/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@

pub mod fst_apply;
pub mod fst_values_mapper;
pub mod index_apply;
pub mod predicate;
1 change: 1 addition & 0 deletions src/index/src/inverted_index/search/fst_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use crate::inverted_index::FstMap;

/// A trait for objects that can process a finite state transducer (FstMap) and return
/// associated values.
#[mockall::automock]
pub trait FstApplier: Send + Sync {
/// Retrieves values from an FstMap.
///
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ impl KeysFstApplier {
fn get_list(p: &Predicate) -> &HashSet<Bytes> {
match p {
Predicate::InList(i) => &i.list,
_ => unreachable!(), // `in_lists` is filtered by `split_at_in_lists
_ => unreachable!(), // `in_lists` is filtered by `split_at_in_lists`
}
}

Expand Down
57 changes: 57 additions & 0 deletions src/index/src/inverted_index/search/index_apply.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

mod predicates_apply;

use async_trait::async_trait;
pub use predicates_apply::PredicatesIndexApplier;

use crate::inverted_index::error::Result;
use crate::inverted_index::format::reader::InvertedIndexReader;

/// A trait for processing and transforming indices obtained from an inverted index.
///
/// Applier instances are reusable and work with various `InvertedIndexReader` instances,
/// avoiding repeated compilation of fixed predicates such as regex patterns.
#[async_trait]
pub trait IndexApplier {
/// Applies the predefined predicates to the data read by the given index reader, returning
/// a list of relevant indices (e.g., post IDs, group IDs, row IDs).
async fn apply(
&self,
context: SearchContext,
reader: &mut dyn InvertedIndexReader,
) -> Result<Vec<usize>>;
}

/// A context for searching the inverted index.
#[derive(Clone, Debug, Eq, PartialEq, Default)]
pub struct SearchContext {
/// `index_not_found_strategy` controls the behavior of the applier when the index is not found.
pub index_not_found_strategy: IndexNotFoundStrategy,
}

/// Defines the behavior of an applier when the index is not found.
#[derive(Clone, Copy, Debug, Eq, PartialEq, Default)]
pub enum IndexNotFoundStrategy {
/// Return an empty list of indices.
#[default]
ReturnEmpty,

/// Ignore the index and continue.
Ignore,

/// Throw an error.
ThrowError,
}
Loading

0 comments on commit 0b421b5

Please sign in to comment.