Skip to content

Commit

Permalink
Minor refactoring (quickwit-oss#1266)
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton authored Jan 28, 2022
1 parent 9679c5f commit eca6628
Show file tree
Hide file tree
Showing 208 changed files with 2,015 additions and 2,331 deletions.
3 changes: 2 additions & 1 deletion bitpacker/src/bitpacker.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::{convert::TryInto, io};
use std::convert::TryInto;
use std::io;

pub struct BitPacker {
mini_buffer: u64,
Expand Down
5 changes: 2 additions & 3 deletions bitpacker/src/blocked_bitpacker.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use super::bitpacker::BitPacker;
use super::compute_num_bits;
use crate::{minmax, BitUnpacker};

use super::{bitpacker::BitPacker, compute_num_bits};

const BLOCK_SIZE: usize = 128;

/// `BlockedBitpacker` compresses data in blocks of
/// 128 elements, while keeping an index on it
///
#[derive(Debug, Clone)]
pub struct BlockedBitpacker {
// bitpacked blocks
Expand Down
3 changes: 1 addition & 2 deletions bitpacker/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
mod bitpacker;
mod blocked_bitpacker;

pub use crate::bitpacker::BitPacker;
pub use crate::bitpacker::BitUnpacker;
pub use crate::bitpacker::{BitPacker, BitUnpacker};
pub use crate::blocked_bitpacker::BlockedBitpacker;

/// Computes the number of bits that will be used for bitpacking.
Expand Down
21 changes: 9 additions & 12 deletions common/src/bitset.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use ownedbytes::OwnedBytes;
use std::convert::TryInto;
use std::io::Write;
use std::u64;
use std::{fmt, io};
use std::{fmt, io, u64};

use ownedbytes::OwnedBytes;

#[derive(Clone, Copy, Eq, PartialEq)]
pub struct TinySet(u64);
Expand Down Expand Up @@ -187,7 +187,6 @@ fn num_buckets(max_val: u32) -> u32 {

impl BitSet {
/// serialize a `BitSet`.
///
pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
writer.write_all(self.max_value.to_le_bytes().as_ref())?;
for tinyset in self.tinysets.iter().cloned() {
Expand Down Expand Up @@ -353,7 +352,6 @@ impl ReadOnlyBitSet {
}

/// Iterate the tinyset on the fly from serialized data.
///
#[inline]
fn iter_tinysets(&self) -> impl Iterator<Item = TinySet> + '_ {
self.data.chunks_exact(8).map(move |chunk| {
Expand All @@ -363,7 +361,6 @@ impl ReadOnlyBitSet {
}

/// Iterate over the positions of the elements.
///
#[inline]
pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
self.iter_tinysets()
Expand Down Expand Up @@ -415,14 +412,14 @@ impl<'a> From<&'a BitSet> for ReadOnlyBitSet {
#[cfg(test)]
mod tests {

use super::BitSet;
use super::ReadOnlyBitSet;
use super::TinySet;
use std::collections::HashSet;

use ownedbytes::OwnedBytes;
use rand::distributions::Bernoulli;
use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use std::collections::HashSet;

use super::{BitSet, ReadOnlyBitSet, TinySet};

#[test]
fn test_read_serialized_bitset_full_multi() {
Expand Down Expand Up @@ -710,10 +707,10 @@ mod tests {
#[cfg(all(test, feature = "unstable"))]
mod bench {

use super::BitSet;
use super::TinySet;
use test;

use super::{BitSet, TinySet};

#[bench]
fn bench_tinyset_pop(b: &mut test::Bencher) {
b.iter(|| {
Expand Down
15 changes: 8 additions & 7 deletions common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,12 @@ pub fn u64_to_f64(val: u64) -> f64 {
#[cfg(test)]
pub mod test {

use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
use super::{BinarySerializable, FixedSize};
use proptest::prelude::*;
use std::f64;

use proptest::prelude::*;

use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64, BinarySerializable, FixedSize};

fn test_i64_converter_helper(val: i64) {
assert_eq!(u64_to_i64(i64_to_u64(val)), val);
}
Expand Down Expand Up @@ -157,10 +158,10 @@ pub mod test {
#[test]
fn test_f64_order() {
assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
.contains(&f64_to_u64(f64::NAN))); //nan is not a number
assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
.contains(&f64_to_u64(f64::NAN))); // nan is not a number
assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa
assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent
assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa
assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
Expand Down
14 changes: 6 additions & 8 deletions common/src/serialize.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
use crate::Endianness;
use crate::VInt;
use std::io::{Read, Write};
use std::{fmt, io};

use byteorder::{ReadBytesExt, WriteBytesExt};
use std::fmt;
use std::io;
use std::io::Read;
use std::io::Write;

use crate::{Endianness, VInt};

/// Trait for a simple binary serialization.
pub trait BinarySerializable: fmt::Debug + Sized {
Expand Down Expand Up @@ -202,8 +201,7 @@ impl BinarySerializable for String {
#[cfg(test)]
pub mod test {

use super::VInt;
use super::*;
use super::{VInt, *};
use crate::serialize::BinarySerializable;
pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
let mut buffer = Vec::new();
Expand Down
13 changes: 6 additions & 7 deletions common/src/vint.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
use super::BinarySerializable;
use byteorder::{ByteOrder, LittleEndian};
use std::io;
use std::io::Read;
use std::io::Write;
use std::io::{Read, Write};

use byteorder::{ByteOrder, LittleEndian};

use super::BinarySerializable;

/// Wrapper over a `u64` that serializes as a variable int.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
Expand Down Expand Up @@ -174,9 +175,7 @@ impl BinarySerializable for VInt {
#[cfg(test)]
mod tests {

use super::serialize_vint_u32;
use super::BinarySerializable;
use super::VInt;
use super::{serialize_vint_u32, BinarySerializable, VInt};

fn aux_test_vint(val: u64) {
let mut v = [14u8; 10];
Expand Down
10 changes: 5 additions & 5 deletions common/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ impl<W: TerminatingWrite> TerminatingWrite for CountingWriter<W> {
}
}

/// Struct used to prevent from calling [`terminate_ref`](trait.TerminatingWrite.html#tymethod.terminate_ref) directly
/// Struct used to prevent from calling
/// [`terminate_ref`](trait.TerminatingWrite.html#tymethod.terminate_ref) directly
///
/// The point is that while the type is public, it cannot be built by anyone
/// outside of this module.
Expand All @@ -64,9 +65,7 @@ pub struct AntiCallToken(());
pub trait TerminatingWrite: Write {
/// Indicate that the writer will no longer be used. Internally call terminate_ref.
fn terminate(mut self) -> io::Result<()>
where
Self: Sized,
{
where Self: Sized {
self.terminate_ref(AntiCallToken(()))
}

Expand Down Expand Up @@ -97,9 +96,10 @@ impl<'a> TerminatingWrite for &'a mut Vec<u8> {
#[cfg(test)]
mod test {

use super::CountingWriter;
use std::io::Write;

use super::CountingWriter;

#[test]
fn test_counting_writer() {
let buffer: Vec<u8> = vec![];
Expand Down
4 changes: 2 additions & 2 deletions examples/basic_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ fn main() -> tantivy::Result<()> {
old_man_doc.add_text(title, "The Old Man and the Sea");
old_man_doc.add_text(
body,
"He was an old man who fished alone in a skiff in the Gulf Stream and \
he had gone eighty-four days now without taking a fish.",
"He was an old man who fished alone in a skiff in the Gulf Stream and he had gone \
eighty-four days now without taking a fish.",
);

// ... and add it to the `IndexWriter`.
Expand Down
3 changes: 1 addition & 2 deletions examples/custom_collector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
use tantivy::collector::{Collector, SegmentCollector};
use tantivy::fastfield::{DynamicFastFieldReader, FastFieldReader};
use tantivy::query::QueryParser;
use tantivy::schema::Field;
use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
use tantivy::schema::{Field, Schema, FAST, INDEXED, TEXT};
use tantivy::{doc, Index, Score, SegmentReader};

#[derive(Default)]
Expand Down
5 changes: 3 additions & 2 deletions examples/deleting_updating_documents.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ fn main() -> tantivy::Result<()> {
// If it is `text`, let's make sure to keep it `raw` and let's avoid
// running any text processing on it.
// This is done by associating this field to the tokenizer named `raw`.
// Rather than building our [`TextOptions`](//docs.rs/tantivy/~0/tantivy/schema/struct.TextOptions.html) manually,
// We use the `STRING` shortcut. `STRING` stands for indexed (without term frequency or positions)
// Rather than building our
// [`TextOptions`](//docs.rs/tantivy/~0/tantivy/schema/struct.TextOptions.html) manually, We
// use the `STRING` shortcut. `STRING` stands for indexed (without term frequency or positions)
// and untokenized.
//
// Because we also want to be able to see this `id` in our returned documents,
Expand Down
6 changes: 3 additions & 3 deletions examples/faceted_search_with_tweaked_score.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::collections::HashSet;

use tantivy::collector::TopDocs;
use tantivy::doc;
use tantivy::query::BooleanQuery;
use tantivy::schema::*;
use tantivy::{DocId, Index, Score, SegmentReader};
use tantivy::{doc, DocId, Index, Score, SegmentReader};

fn main() -> tantivy::Result<()> {
let mut schema_builder = Schema::builder();
Expand Down Expand Up @@ -87,7 +87,7 @@ fn main() -> tantivy::Result<()> {
.unwrap()
.get_first(title)
.unwrap()
.text()
.as_text()
.unwrap()
.to_owned()
})
Expand Down
16 changes: 8 additions & 8 deletions examples/iterating_docs_and_positions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ fn main() -> tantivy::Result<()> {
let term_the = Term::from_field_text(title, "the");

// This segment posting object is like a cursor over the documents matching the term.
// The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies
// and positions.
// The `IndexRecordOption` arguments tells tantivy we will be interested in both term
// frequencies and positions.
//
// If you don't need all this information, you may get better performance by decompressing less
// information.
// If you don't need all this information, you may get better performance by decompressing
// less information.
if let Some(mut segment_postings) =
inverted_index.read_postings(&term_the, IndexRecordOption::WithFreqsAndPositions)?
{
Expand Down Expand Up @@ -109,11 +109,11 @@ fn main() -> tantivy::Result<()> {
let inverted_index = segment_reader.inverted_index(title)?;

// This segment posting object is like a cursor over the documents matching the term.
// The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies
// and positions.
// The `IndexRecordOption` arguments tells tantivy we will be interested in both term
// frequencies and positions.
//
// If you don't need all this information, you may get better performance by decompressing less
// information.
// If you don't need all this information, you may get better performance by decompressing
// less information.
if let Some(mut block_segment_postings) =
inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic)?
{
Expand Down
4 changes: 3 additions & 1 deletion examples/multiple_producer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
use std::sync::{Arc, RwLock};
use std::thread;
use std::time::Duration;

use tantivy::schema::{Schema, STORED, TEXT};
use tantivy::{doc, Index, IndexWriter, Opstamp, TantivyError};

Expand Down Expand Up @@ -90,7 +91,8 @@ fn main() -> tantivy::Result<()> {
// # In the main thread, we commit 10 times, once every 500ms.
for _ in 0..10 {
let opstamp: Opstamp = {
// Committing or rollbacking on the other hand requires write lock. This will block other threads.
// Committing or rollbacking on the other hand requires write lock. This will block
// other threads.
let mut index_writer_wlock = index_writer.write().unwrap();
index_writer_wlock.commit()?
};
Expand Down
5 changes: 4 additions & 1 deletion examples/snippet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ fn main() -> tantivy::Result<()> {
let doc = searcher.doc(doc_address)?;
let snippet = snippet_generator.snippet_from_doc(&doc);
println!("Document score {}:", score);
println!("title: {}", doc.get_first(title).unwrap().text().unwrap());
println!(
"title: {}",
doc.get_first(title).unwrap().as_text().unwrap()
);
println!("snippet: {}", snippet.to_html());
println!("custom highlighting: {}", highlight(snippet));
}
Expand Down
7 changes: 4 additions & 3 deletions examples/warmer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@ use tantivy::collector::TopDocs;
use tantivy::fastfield::FastFieldReader;
use tantivy::query::QueryParser;
use tantivy::schema::{Field, Schema, FAST, TEXT};
use tantivy::{doc, DocAddress, DocId, Index, IndexReader, SegmentReader};
use tantivy::{Opstamp, Searcher, SearcherGeneration, SegmentId, Warmer};
use tantivy::{
doc, DocAddress, DocId, Index, IndexReader, Opstamp, Searcher, SearcherGeneration, SegmentId,
SegmentReader, Warmer,
};

// This example shows how warmers can be used to
// load a values from an external sources using the Warmer API.
Expand Down Expand Up @@ -90,7 +92,6 @@ impl Warmer for DynamicPriceColumn {
/// This map represents a map (ProductId -> Price)
///
/// In practise, it could be fetching things from an external service, like a SQL table.
///
#[derive(Default, Clone)]
pub struct ExternalPriceTable {
prices: Arc<RwLock<HashMap<ProductId, Price>>>,
Expand Down
14 changes: 7 additions & 7 deletions fastfield_codecs/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ extern crate test;

#[cfg(test)]
mod tests {
use fastfield_codecs::{
bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer},
linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer},
multilinearinterpol::{
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
},
*,
use fastfield_codecs::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
use fastfield_codecs::linearinterpol::{
LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
};
use fastfield_codecs::multilinearinterpol::{
MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
};
use fastfield_codecs::*;

fn get_data() -> Vec<u64> {
let mut data: Vec<_> = (100..55000_u64)
Expand Down
12 changes: 4 additions & 8 deletions fastfield_codecs/src/bitpacked.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
use crate::FastFieldCodecReader;
use crate::FastFieldCodecSerializer;
use crate::FastFieldDataAccess;
use crate::FastFieldStats;
use common::BinarySerializable;
use std::io::{self, Write};
use tantivy_bitpacker::compute_num_bits;
use tantivy_bitpacker::BitPacker;

use tantivy_bitpacker::BitUnpacker;
use common::BinarySerializable;
use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};

use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};

/// Depending on the field type, a different
/// fast field is required.
Expand Down
Loading

0 comments on commit eca6628

Please sign in to comment.