Skip to content

Commit

Permalink
Moving FileSlice to tantivy-common (quickwit-oss#1729)
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton authored Dec 21, 2022
1 parent 32cb1d2 commit f39165e
Show file tree
Hide file tree
Showing 27 changed files with 89 additions and 56 deletions.
4 changes: 1 addition & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ fs2 = { version = "0.4.3", optional = true }
levenshtein_automata = "0.2.1"
uuid = { version = "1.0.0", features = ["v4", "serde"] }
crossbeam-channel = "0.5.4"
stable_deref_trait = "1.2.0"
rust-stemmers = "1.2.0"
downcast-rs = "1.2.0"
bitpacking = { version = "0.8.4", default-features = false, features = ["bitpacker4x"] }
Expand All @@ -60,9 +59,8 @@ sstable = { version="0.1", path="./sstable", package ="tantivy-sstable", optiona
stacker = { version="0.1", path="./stacker", package ="tantivy-stacker" }
tantivy-query-grammar = { version= "0.19.0", path="./query-grammar" }
tantivy-bitpacker = { version= "0.3", path="./bitpacker" }
common = { version= "0.4", path = "./common/", package = "tantivy-common" }
common = { version= "0.5", path = "./common/", package = "tantivy-common" }
fastfield_codecs = { version= "0.3", path="./fastfield_codecs", default-features = false }
ownedbytes = { version= "0.4", path="./ownedbytes" }

[target.'cfg(windows)'.dependencies]
winapi = "0.3.9"
Expand Down
5 changes: 3 additions & 2 deletions common/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "tantivy-common"
version = "0.4.0"
version = "0.5.0"
authors = ["Paul Masurel <[email protected]>", "Pascal Seitz <[email protected]>"]
license = "MIT"
edition = "2021"
Expand All @@ -14,7 +14,8 @@ repository = "https://github.com/quickwit-oss/tantivy"

[dependencies]
byteorder = "1.4.3"
ownedbytes = { version= "0.4", path="../ownedbytes" }
ownedbytes = { version= "0.5", path="../ownedbytes" }
async-trait = "0.1"

[dev-dependencies]
proptest = "1.0.0"
Expand Down
68 changes: 57 additions & 11 deletions src/directory/file_slice.rs → common/src/file_slice.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
use std::ops::{Deref, Range};
use std::ops::{Deref, Range, RangeBounds};
use std::sync::Arc;
use std::{fmt, io};

use async_trait::async_trait;
use common::HasLen;
use stable_deref_trait::StableDeref;
use ownedbytes::{OwnedBytes, StableDeref};

use crate::directory::OwnedBytes;
use crate::HasLen;

/// Objects that represents files sections in tantivy.
///
/// By contract, whatever happens to the directory file, as long as a FileHandle
/// is alive, the data associated with it cannot be altered or destroyed.
///
/// The underlying behavior is therefore specific to the [`Directory`](crate::Directory) that
/// The underlying behavior is therefore specific to the `Directory` that
/// created it. Despite its name, a [`FileSlice`] may or may not directly map to an actual file
/// on the filesystem.
Expand Down Expand Up @@ -68,6 +67,34 @@ impl fmt::Debug for FileSlice {
}
}

/// Takes a range, a `RangeBounds` object, and returns
/// a `Range` that corresponds to the relative application of the
/// `RangeBounds` object to the original `Range`.
///
/// For instance, combine_ranges(`[2..11)`, `[5..7]`) returns `[7..10]`
/// as it reads, what is the sub-range that starts at the 5 element of
/// `[2..11)` and ends at the 9th element included.
///
/// This function panics, if the result would suggest something outside
/// of the bounds of the original range.
fn combine_ranges<R: RangeBounds<usize>>(orig_range: Range<usize>, rel_range: R) -> Range<usize> {
let start: usize = orig_range.start
+ match rel_range.start_bound().cloned() {
std::ops::Bound::Included(rel_start) => rel_start,
std::ops::Bound::Excluded(rel_start) => rel_start + 1,
std::ops::Bound::Unbounded => 0,
};
assert!(start <= orig_range.end);
let end: usize = match rel_range.end_bound().cloned() {
std::ops::Bound::Included(rel_end) => orig_range.start + rel_end + 1,
std::ops::Bound::Excluded(rel_end) => orig_range.start + rel_end,
std::ops::Bound::Unbounded => orig_range.end,
};
assert!(end >= start);
assert!(end <= orig_range.end);
start..end
}

impl FileSlice {
/// Wraps a FileHandle.
pub fn new(file_handle: Arc<dyn FileHandle>) -> Self {
Expand All @@ -91,11 +118,11 @@ impl FileSlice {
///
/// Panics if `byte_range.end` exceeds the filesize.
#[must_use]
pub fn slice(&self, byte_range: Range<usize>) -> FileSlice {
assert!(byte_range.end <= self.len());
#[inline]
pub fn slice<R: RangeBounds<usize>>(&self, byte_range: R) -> FileSlice {
FileSlice {
data: self.data.clone(),
range: self.range.start + byte_range.start..self.range.start + byte_range.end,
range: combine_ranges(self.range.clone(), byte_range),
}
}

Expand Down Expand Up @@ -134,7 +161,6 @@ impl FileSlice {
.read_bytes(self.range.start + range.start..self.range.start + range.end)
}

#[cfg(feature = "quickwit")]
#[doc(hidden)]
pub async fn read_bytes_slice_async(&self, byte_range: Range<usize>) -> io::Result<OwnedBytes> {
assert!(
Expand Down Expand Up @@ -225,11 +251,12 @@ impl FileHandle for OwnedBytes {
#[cfg(test)]
mod tests {
use std::io;
use std::ops::Bound;
use std::sync::Arc;

use common::HasLen;

use super::{FileHandle, FileSlice};
use crate::file_slice::combine_ranges;
use crate::HasLen;

#[test]
fn test_file_slice() -> io::Result<()> {
Expand Down Expand Up @@ -300,4 +327,23 @@ mod tests {
b"bcd"
);
}

#[test]
fn test_combine_range() {
assert_eq!(combine_ranges(1..3, 0..1), 1..2);
assert_eq!(combine_ranges(1..3, 1..), 2..3);
assert_eq!(combine_ranges(1..4, ..2), 1..3);
assert_eq!(combine_ranges(3..10, 2..5), 5..8);
assert_eq!(combine_ranges(2..11, 5..=7), 7..10);
assert_eq!(
combine_ranges(2..11, (Bound::Excluded(5), Bound::Unbounded)),
8..11
);
}

#[test]
#[should_panic]
fn test_combine_range_panics() {
let _ = combine_ranges(3..5, 1..4);
}
}
3 changes: 2 additions & 1 deletion common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@ use std::ops::Deref;
pub use byteorder::LittleEndian as Endianness;

mod bitset;
pub mod file_slice;
mod serialize;
mod vint;
mod writer;

pub use bitset::*;
pub use ownedbytes::{OwnedBytes, StableDeref};
pub use serialize::{BinarySerializable, DeserializeFrom, FixedSize};
pub use vint::{
deserialize_vint_u128, read_u32_vint, read_u32_vint_no_advance, serialize_vint_u128,
Expand Down
3 changes: 1 addition & 2 deletions fastfield_codecs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@ repository = "https://github.com/quickwit-oss/tantivy"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
common = { version = "0.4", path = "../common/", package = "tantivy-common" }
common = { version = "0.5", path = "../common/", package = "tantivy-common" }
tantivy-bitpacker = { version= "0.3", path = "../bitpacker/" }
ownedbytes = { version = "0.4.0", path = "../ownedbytes" }
prettytable-rs = {version="0.9.0", optional= true}
rand = {version="0.8.3", optional= true}
fastdivide = "0.4"
Expand Down
2 changes: 1 addition & 1 deletion fastfield_codecs/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ mod tests {
use std::iter;
use std::sync::Arc;

use common::OwnedBytes;
use fastfield_codecs::*;
use ownedbytes::OwnedBytes;
use rand::prelude::*;
use test::Bencher;

Expand Down
2 changes: 1 addition & 1 deletion fastfield_codecs/src/bitpacked.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::io::{self, Write};

use ownedbytes::OwnedBytes;
use common::OwnedBytes;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};

use crate::serialize::NormalizedHeader;
Expand Down
5 changes: 2 additions & 3 deletions fastfield_codecs/src/blockwise_linear.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use std::sync::Arc;
use std::{io, iter};

use common::{BinarySerializable, CountingWriter, DeserializeFrom};
use ownedbytes::OwnedBytes;
use common::{BinarySerializable, CountingWriter, DeserializeFrom, OwnedBytes};
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};

use crate::line::Line;
Expand Down Expand Up @@ -47,7 +46,7 @@ impl FastFieldCodec for BlockwiseLinearCodec {
type Reader = BlockwiseLinearReader;

fn open_from_bytes(
bytes: ownedbytes::OwnedBytes,
bytes: common::OwnedBytes,
normalized_header: NormalizedHeader,
) -> io::Result<Self::Reader> {
let footer_len: u32 = (&bytes[bytes.len() - 4..]).deserialize()?;
Expand Down
3 changes: 1 addition & 2 deletions fastfield_codecs/src/compact_space/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ use std::{
ops::{Range, RangeInclusive},
};

use common::{BinarySerializable, CountingWriter, VInt, VIntU128};
use ownedbytes::OwnedBytes;
use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt, VIntU128};
use tantivy_bitpacker::{self, BitPacker, BitUnpacker};

use crate::compact_space::build_compact_space::get_compact_space;
Expand Down
3 changes: 1 addition & 2 deletions fastfield_codecs/src/format_version.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::io;

use common::BinarySerializable;
use ownedbytes::OwnedBytes;
use common::{BinarySerializable, OwnedBytes};

const MAGIC_NUMBER: u16 = 4335u16;
const FASTFIELD_FORMAT_VERSION: u8 = 1;
Expand Down
2 changes: 1 addition & 1 deletion fastfield_codecs/src/gcd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ mod tests {
use std::io;
use std::num::NonZeroU64;

use ownedbytes::OwnedBytes;
use common::OwnedBytes;

use crate::gcd::{compute_gcd, find_gcd};
use crate::{FastFieldCodecType, VecColumn};
Expand Down
5 changes: 2 additions & 3 deletions fastfield_codecs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,14 @@ use std::io;
use std::io::Write;
use std::sync::Arc;

use common::BinarySerializable;
use common::{BinarySerializable, OwnedBytes};
use compact_space::CompactSpaceDecompressor;
use format_version::read_format_version;
use monotonic_mapping::{
StrictlyMonotonicMappingInverter, StrictlyMonotonicMappingToInternal,
StrictlyMonotonicMappingToInternalBaseval, StrictlyMonotonicMappingToInternalGCDBaseval,
};
use null_index_footer::read_null_index_footer;
use ownedbytes::OwnedBytes;
use serialize::{Header, U128Header};

mod bitpacked;
Expand Down Expand Up @@ -436,7 +435,7 @@ mod tests {
mod bench {
use std::sync::Arc;

use ownedbytes::OwnedBytes;
use common::OwnedBytes;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use test::{self, Bencher};
Expand Down
3 changes: 1 addition & 2 deletions fastfield_codecs/src/linear.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::io::{self, Write};

use common::BinarySerializable;
use ownedbytes::OwnedBytes;
use common::{BinarySerializable, OwnedBytes};
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};

use crate::line::Line;
Expand Down
2 changes: 1 addition & 1 deletion fastfield_codecs/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ use std::io::BufRead;
use std::net::{IpAddr, Ipv6Addr};
use std::str::FromStr;

use common::OwnedBytes;
use fastfield_codecs::{open_u128, serialize_u128, Column, FastFieldCodecType, VecColumn};
use itertools::Itertools;
use measure_time::print_time;
use ownedbytes::OwnedBytes;
use prettytable::{Cell, Row, Table};

fn print_set_stats(ip_addrs: &[u128]) {
Expand Down
3 changes: 1 addition & 2 deletions fastfield_codecs/src/null_index/dense.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use std::convert::TryInto;
use std::io::{self, Write};

use common::BinarySerializable;
use common::{BinarySerializable, OwnedBytes};
use itertools::Itertools;
use ownedbytes::OwnedBytes;

use super::{get_bit_at, set_bit_at};

Expand Down
3 changes: 1 addition & 2 deletions fastfield_codecs/src/null_index/sparse.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::io::{self, Write};

use common::BitSet;
use ownedbytes::OwnedBytes;
use common::{BitSet, OwnedBytes};

use super::{serialize_dense_codec, DenseCodec};

Expand Down
3 changes: 1 addition & 2 deletions fastfield_codecs/src/null_index_footer.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use std::io::{self, Write};
use std::ops::Range;

use common::{BinarySerializable, CountingWriter, VInt};
use ownedbytes::OwnedBytes;
use common::{BinarySerializable, CountingWriter, OwnedBytes, VInt};

#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub(crate) enum FastFieldCardinality {
Expand Down
3 changes: 1 addition & 2 deletions fastfield_codecs/src/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ use std::io;
use std::num::NonZeroU64;
use std::sync::Arc;

use common::{BinarySerializable, VInt};
use common::{BinarySerializable, OwnedBytes, VInt};
use log::warn;
use ownedbytes::OwnedBytes;

use crate::bitpacked::BitpackedCodec;
use crate::blockwise_linear::BlockwiseLinearCodec;
Expand Down
2 changes: 1 addition & 1 deletion ownedbytes/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
authors = ["Paul Masurel <[email protected]>", "Pascal Seitz <[email protected]>"]
name = "ownedbytes"
version = "0.4.0"
version = "0.5.0"
edition = "2021"
description = "Expose data as static slice"
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion ownedbytes/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::ops::{Deref, Range};
use std::sync::Arc;
use std::{fmt, io, mem};

use stable_deref_trait::StableDeref;
pub use stable_deref_trait::StableDeref;

/// An OwnedBytes simply wraps an object that owns a slice of data and exposes
/// this data as a slice.
Expand Down
2 changes: 1 addition & 1 deletion src/directory/mmap_directory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ use std::path::{Path, PathBuf};
use std::sync::{Arc, RwLock, Weak};
use std::{fmt, result};

use common::StableDeref;
use fs2::FileExt;
use memmap2::Mmap;
use serde::{Deserialize, Serialize};
use stable_deref_trait::StableDeref;
use tempfile::TempDir;

use crate::core::META_FILEPATH;
Expand Down
6 changes: 2 additions & 4 deletions src/directory/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ mod mmap_directory;

mod directory;
mod directory_lock;
mod file_slice;
mod file_watcher;
mod footer;
mod managed_directory;
Expand All @@ -20,13 +19,12 @@ mod composite_file;
use std::io::BufWriter;
use std::path::PathBuf;

pub use common::{AntiCallToken, TerminatingWrite};
pub use ownedbytes::OwnedBytes;
pub use common::file_slice::{FileHandle, FileSlice};
pub use common::{AntiCallToken, OwnedBytes, TerminatingWrite};

pub(crate) use self::composite_file::{CompositeFile, CompositeWrite};
pub use self::directory::{Directory, DirectoryClone, DirectoryLock};
pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
pub use self::file_slice::{FileHandle, FileSlice};
pub use self::ram_directory::RamDirectory;
pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle};

Expand Down
3 changes: 1 addition & 2 deletions src/fastfield/alive_bitset.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use std::io;
use std::io::Write;

use common::{intersect_bitsets, BitSet, ReadOnlyBitSet};
use ownedbytes::OwnedBytes;
use common::{intersect_bitsets, BitSet, OwnedBytes, ReadOnlyBitSet};

use crate::space_usage::ByteCount;
use crate::DocId;
Expand Down
Loading

0 comments on commit f39165e

Please sign in to comment.