Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(inverted_index.search): add fst values mapper #2862

Merged
merged 12 commits into from
Dec 4, 2023
97 changes: 97 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ humantime-serde = "1.1"
itertools = "0.10"
lazy_static = "1.4"
meter-core = { git = "https://github.com/GreptimeTeam/greptime-meter.git", rev = "abbd357c1e193cd270ea65ee7652334a150b628f" }
mockall = "0.11.4"
moka = "0.12"
once_cell = "1.18"
opentelemetry-proto = { git = "https://github.com/waynexia/opentelemetry-rust.git", rev = "33841b38dda79b15f2024952be5f32533325ca02", features = [
Expand Down
1 change: 1 addition & 0 deletions src/index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ common-macro.workspace = true
fst.workspace = true
futures.workspace = true
greptime-proto.workspace = true
mockall.workspace = true
prost.workspace = true
regex-automata.workspace = true
regex.workspace = true
Expand Down
1 change: 1 addition & 0 deletions src/index/src/inverted_index/format/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use crate::inverted_index::error::Result;
use crate::inverted_index::FstMap;

/// InvertedIndexReader defines an asynchronous reader of inverted index data
#[mockall::automock]
#[async_trait]
pub trait InvertedIndexReader {
/// Retrieve metadata of all inverted indices stored within the blob.
Expand Down
1 change: 1 addition & 0 deletions src/index/src/inverted_index/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@
// limitations under the License.

pub mod fst_apply;
pub mod fst_values_mapper;
pub mod predicate;
5 changes: 4 additions & 1 deletion src/index/src/inverted_index/search/fst_apply/keys_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,10 @@ mod tests {
},
})];
let result = KeysFstApplier::try_from(predicates);
assert!(result.is_err());
assert!(matches!(
result,
Err(Error::KeysApplierWithoutInList { .. })
));
}

#[test]
Expand Down
113 changes: 113 additions & 0 deletions src/index/src/inverted_index/search/fst_values_mapper.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use common_base::BitVec;
use greptime_proto::v1::index::InvertedIndexMeta;

use crate::inverted_index::error::Result;
use crate::inverted_index::format::reader::InvertedIndexReader;

/// `FstValuesMapper` maps FST-encoded u64 values to their corresponding bitmaps
/// within an inverted index. The higher 32 bits of each u64 value represent the
/// bitmap offset and the lower 32 bits represent its size. This mapper uses these
/// combined offset-size pairs to fetch and union multiple bitmaps into a single `BitVec`.
pub struct FstValuesMapper<'a> {
/// `reader` retrieves bitmap data using offsets and sizes from FST values.
reader: &'a mut dyn InvertedIndexReader,

/// `metadata` provides context for interpreting the index structures.
metadata: &'a InvertedIndexMeta,
}

impl<'a> FstValuesMapper<'a> {
pub fn new(
reader: &'a mut dyn InvertedIndexReader,
metadata: &'a InvertedIndexMeta,
) -> FstValuesMapper<'a> {
FstValuesMapper { reader, metadata }
}

/// Maps an array of FST values to a `BitVec` by retrieving and combining bitmaps.
pub async fn map_values(&mut self, values: &[u64]) -> Result<BitVec> {
let mut bitmap = BitVec::new();

for value in values {
// relative_offset (higher 32 bits), size (lower 32 bits)
let relative_offset = (value >> 32) as u32;
let size = *value as u32;
zhongzc marked this conversation as resolved.
Show resolved Hide resolved

let bm = self
.reader
.bitmap(self.metadata, relative_offset, size)
.await?;

// Ensure the longest BitVec is the left operand to prevent truncation during OR.
if bm.len() > bitmap.len() {
bitmap = bm | bitmap
} else {
bitmap |= bm
}
}
waynexia marked this conversation as resolved.
Show resolved Hide resolved

Ok(bitmap)
}
}

#[cfg(test)]
mod tests {
use common_base::bit_vec::prelude::*;

use super::*;
use crate::inverted_index::format::reader::MockInvertedIndexReader;

fn value(offset: u32, size: u32) -> u64 {
((offset as u64) << 32) | (size as u64)
}

#[tokio::test]
async fn test_map_values() {
let mut mock_reader = MockInvertedIndexReader::new();
mock_reader
.expect_bitmap()
.returning(|_, offset, size| match (offset, size) {
(1, 1) => Ok(bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1]),
(2, 1) => Ok(bitvec![u8, Lsb0; 0, 1, 0, 1, 0, 1, 0, 1]),
_ => unreachable!(),
});

let meta = InvertedIndexMeta::default();
let mut values_mapper = FstValuesMapper::new(&mut mock_reader, &meta);

let result = values_mapper.map_values(&[]).await.unwrap();
assert_eq!(result.count_ones(), 0);

let result = values_mapper.map_values(&[value(1, 1)]).await.unwrap();
assert_eq!(result, bitvec![u8, Lsb0; 1, 0, 1, 0, 1, 0, 1]);

let result = values_mapper.map_values(&[value(2, 1)]).await.unwrap();
assert_eq!(result, bitvec![u8, Lsb0; 0, 1, 0, 1, 0, 1, 0, 1]);

let result = values_mapper
.map_values(&[value(1, 1), value(2, 1)])
.await
.unwrap();
assert_eq!(result, bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1]);

let result = values_mapper
.map_values(&[value(2, 1), value(1, 1)])
.await
.unwrap();
assert_eq!(result, bitvec![u8, Lsb0; 1, 1, 1, 1, 1, 1, 1, 1]);
}
}
Loading