Minor refactoring (quickwit-oss#1266)

arafat877 · Jan 28, 2022 · eca6628 · eca6628
1 parent 9679c5f
commit eca6628
Show file tree

Hide file tree

Showing 208 changed files with 2,015 additions and 2,331 deletions.
diff --git a/bitpacker/src/bitpacker.rs b/bitpacker/src/bitpacker.rs
@@ -1,4 +1,5 @@
-use std::{convert::TryInto, io};
+use std::convert::TryInto;
+use std::io;
 
 pub struct BitPacker {
     mini_buffer: u64,

diff --git a/bitpacker/src/blocked_bitpacker.rs b/bitpacker/src/blocked_bitpacker.rs
@@ -1,12 +1,11 @@
+use super::bitpacker::BitPacker;
+use super::compute_num_bits;
 use crate::{minmax, BitUnpacker};
 
-use super::{bitpacker::BitPacker, compute_num_bits};
-
 const BLOCK_SIZE: usize = 128;
 
 /// `BlockedBitpacker` compresses data in blocks of
 /// 128 elements, while keeping an index on it
-///
 #[derive(Debug, Clone)]
 pub struct BlockedBitpacker {
     // bitpacked blocks

diff --git a/bitpacker/src/lib.rs b/bitpacker/src/lib.rs
@@ -1,8 +1,7 @@
 mod bitpacker;
 mod blocked_bitpacker;
 
-pub use crate::bitpacker::BitPacker;
-pub use crate::bitpacker::BitUnpacker;
+pub use crate::bitpacker::{BitPacker, BitUnpacker};
 pub use crate::blocked_bitpacker::BlockedBitpacker;
 
 /// Computes the number of bits that will be used for bitpacking.

diff --git a/common/src/bitset.rs b/common/src/bitset.rs
@@ -1,8 +1,8 @@
-use ownedbytes::OwnedBytes;
 use std::convert::TryInto;
 use std::io::Write;
-use std::u64;
-use std::{fmt, io};
+use std::{fmt, io, u64};
+
+use ownedbytes::OwnedBytes;
 
 #[derive(Clone, Copy, Eq, PartialEq)]
 pub struct TinySet(u64);
@@ -187,7 +187,6 @@ fn num_buckets(max_val: u32) -> u32 {
 
 impl BitSet {
     /// serialize a `BitSet`.
-    ///
     pub fn serialize<T: Write>(&self, writer: &mut T) -> io::Result<()> {
         writer.write_all(self.max_value.to_le_bytes().as_ref())?;
         for tinyset in self.tinysets.iter().cloned() {
@@ -353,7 +352,6 @@ impl ReadOnlyBitSet {
     }
 
     /// Iterate the tinyset on the fly from serialized data.
-    ///
     #[inline]
     fn iter_tinysets(&self) -> impl Iterator<Item = TinySet> + '_ {
         self.data.chunks_exact(8).map(move |chunk| {
@@ -363,7 +361,6 @@ impl ReadOnlyBitSet {
     }
 
     /// Iterate over the positions of the elements.
-    ///
     #[inline]
     pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
         self.iter_tinysets()
@@ -415,14 +412,14 @@ impl<'a> From<&'a BitSet> for ReadOnlyBitSet {
 #[cfg(test)]
 mod tests {
 
-    use super::BitSet;
-    use super::ReadOnlyBitSet;
-    use super::TinySet;
+    use std::collections::HashSet;
+
     use ownedbytes::OwnedBytes;
     use rand::distributions::Bernoulli;
     use rand::rngs::StdRng;
     use rand::{Rng, SeedableRng};
-    use std::collections::HashSet;
+
+    use super::{BitSet, ReadOnlyBitSet, TinySet};
 
     #[test]
     fn test_read_serialized_bitset_full_multi() {
@@ -710,10 +707,10 @@ mod tests {
 #[cfg(all(test, feature = "unstable"))]
 mod bench {
 
-    use super::BitSet;
-    use super::TinySet;
     use test;
 
+    use super::{BitSet, TinySet};
+
     #[bench]
     fn bench_tinyset_pop(b: &mut test::Bencher) {
         b.iter(|| {

diff --git a/common/src/lib.rs b/common/src/lib.rs
@@ -104,11 +104,12 @@ pub fn u64_to_f64(val: u64) -> f64 {
 #[cfg(test)]
 pub mod test {
 
-    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64};
-    use super::{BinarySerializable, FixedSize};
-    use proptest::prelude::*;
     use std::f64;
 
+    use proptest::prelude::*;
+
+    use super::{f64_to_u64, i64_to_u64, u64_to_f64, u64_to_i64, BinarySerializable, FixedSize};
+
     fn test_i64_converter_helper(val: i64) {
         assert_eq!(u64_to_i64(i64_to_u64(val)), val);
     }
@@ -157,10 +158,10 @@ pub mod test {
     #[test]
     fn test_f64_order() {
         assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY))
-            .contains(&f64_to_u64(f64::NAN))); //nan is not a number
-        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
-        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
-        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
+            .contains(&f64_to_u64(f64::NAN))); // nan is not a number
+        assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); // same exponent, different mantissa
+        assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); // same mantissa, different exponent
+        assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); // different exponent and mantissa
         assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
         assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
         assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));

diff --git a/common/src/serialize.rs b/common/src/serialize.rs
@@ -1,10 +1,9 @@
-use crate::Endianness;
-use crate::VInt;
+use std::io::{Read, Write};
+use std::{fmt, io};
+
 use byteorder::{ReadBytesExt, WriteBytesExt};
-use std::fmt;
-use std::io;
-use std::io::Read;
-use std::io::Write;
+
+use crate::{Endianness, VInt};
 
 /// Trait for a simple binary serialization.
 pub trait BinarySerializable: fmt::Debug + Sized {
@@ -202,8 +201,7 @@ impl BinarySerializable for String {
 #[cfg(test)]
 pub mod test {
 
-    use super::VInt;
-    use super::*;
+    use super::{VInt, *};
     use crate::serialize::BinarySerializable;
     pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
         let mut buffer = Vec::new();

diff --git a/common/src/vint.rs b/common/src/vint.rs
@@ -1,8 +1,9 @@
-use super::BinarySerializable;
-use byteorder::{ByteOrder, LittleEndian};
 use std::io;
-use std::io::Read;
-use std::io::Write;
+use std::io::{Read, Write};
+
+use byteorder::{ByteOrder, LittleEndian};
+
+use super::BinarySerializable;
 
 ///   Wrapper over a `u64` that serializes as a variable int.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
@@ -174,9 +175,7 @@ impl BinarySerializable for VInt {
 #[cfg(test)]
 mod tests {
 
-    use super::serialize_vint_u32;
-    use super::BinarySerializable;
-    use super::VInt;
+    use super::{serialize_vint_u32, BinarySerializable, VInt};
 
     fn aux_test_vint(val: u64) {
         let mut v = [14u8; 10];

diff --git a/common/src/writer.rs b/common/src/writer.rs
@@ -54,7 +54,8 @@ impl<W: TerminatingWrite> TerminatingWrite for CountingWriter<W> {
     }
 }
 
-/// Struct used to prevent from calling [`terminate_ref`](trait.TerminatingWrite.html#tymethod.terminate_ref) directly
+/// Struct used to prevent from calling
+/// [`terminate_ref`](trait.TerminatingWrite.html#tymethod.terminate_ref) directly
 ///
 /// The point is that while the type is public, it cannot be built by anyone
 /// outside of this module.
@@ -64,9 +65,7 @@ pub struct AntiCallToken(());
 pub trait TerminatingWrite: Write {
     /// Indicate that the writer will no longer be used. Internally call terminate_ref.
     fn terminate(mut self) -> io::Result<()>
-    where
-        Self: Sized,
-    {
+    where Self: Sized {
         self.terminate_ref(AntiCallToken(()))
     }
 
@@ -97,9 +96,10 @@ impl<'a> TerminatingWrite for &'a mut Vec<u8> {
 #[cfg(test)]
 mod test {
 
-    use super::CountingWriter;
     use std::io::Write;
 
+    use super::CountingWriter;
+
     #[test]
     fn test_counting_writer() {
         let buffer: Vec<u8> = vec![];

diff --git a/examples/basic_search.rs b/examples/basic_search.rs
@@ -91,8 +91,8 @@ fn main() -> tantivy::Result<()> {
     old_man_doc.add_text(title, "The Old Man and the Sea");
     old_man_doc.add_text(
         body,
-        "He was an old man who fished alone in a skiff in the Gulf Stream and \
-         he had gone eighty-four days now without taking a fish.",
+        "He was an old man who fished alone in a skiff in the Gulf Stream and he had gone \
+         eighty-four days now without taking a fish.",
     );
 
     // ... and add it to the `IndexWriter`.

diff --git a/examples/custom_collector.rs b/examples/custom_collector.rs
@@ -12,8 +12,7 @@
 use tantivy::collector::{Collector, SegmentCollector};
 use tantivy::fastfield::{DynamicFastFieldReader, FastFieldReader};
 use tantivy::query::QueryParser;
-use tantivy::schema::Field;
-use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
+use tantivy::schema::{Field, Schema, FAST, INDEXED, TEXT};
 use tantivy::{doc, Index, Score, SegmentReader};
 
 #[derive(Default)]

diff --git a/examples/deleting_updating_documents.rs b/examples/deleting_updating_documents.rs
@@ -56,8 +56,9 @@ fn main() -> tantivy::Result<()> {
     // If it is `text`, let's make sure to keep it `raw` and let's avoid
     // running any text processing on it.
     // This is done by associating this field to the tokenizer named `raw`.
-    // Rather than building our [`TextOptions`](//docs.rs/tantivy/~0/tantivy/schema/struct.TextOptions.html) manually,
-    // We use the `STRING` shortcut. `STRING` stands for indexed (without term frequency or positions)
+    // Rather than building our
+    // [`TextOptions`](//docs.rs/tantivy/~0/tantivy/schema/struct.TextOptions.html) manually, We
+    // use the `STRING` shortcut. `STRING` stands for indexed (without term frequency or positions)
     // and untokenized.
     //
     // Because we also want to be able to see this `id` in our returned documents,

diff --git a/examples/faceted_search_with_tweaked_score.rs b/examples/faceted_search_with_tweaked_score.rs
@@ -1,9 +1,9 @@
 use std::collections::HashSet;
+
 use tantivy::collector::TopDocs;
-use tantivy::doc;
 use tantivy::query::BooleanQuery;
 use tantivy::schema::*;
-use tantivy::{DocId, Index, Score, SegmentReader};
+use tantivy::{doc, DocId, Index, Score, SegmentReader};
 
 fn main() -> tantivy::Result<()> {
     let mut schema_builder = Schema::builder();
@@ -87,7 +87,7 @@ fn main() -> tantivy::Result<()> {
                     .unwrap()
                     .get_first(title)
                     .unwrap()
-                    .text()
+                    .as_text()
                     .unwrap()
                     .to_owned()
             })

diff --git a/examples/iterating_docs_and_positions.rs b/examples/iterating_docs_and_positions.rs
@@ -52,11 +52,11 @@ fn main() -> tantivy::Result<()> {
         let term_the = Term::from_field_text(title, "the");
 
         // This segment posting object is like a cursor over the documents matching the term.
-        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies
-        // and positions.
+        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term
+        // frequencies and positions.
         //
-        // If you don't need all this information, you may get better performance by decompressing less
-        // information.
+        // If you don't need all this information, you may get better performance by decompressing
+        // less information.
         if let Some(mut segment_postings) =
             inverted_index.read_postings(&term_the, IndexRecordOption::WithFreqsAndPositions)?
         {
@@ -109,11 +109,11 @@ fn main() -> tantivy::Result<()> {
         let inverted_index = segment_reader.inverted_index(title)?;
 
         // This segment posting object is like a cursor over the documents matching the term.
-        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term frequencies
-        // and positions.
+        // The `IndexRecordOption` arguments tells tantivy we will be interested in both term
+        // frequencies and positions.
         //
-        // If you don't need all this information, you may get better performance by decompressing less
-        // information.
+        // If you don't need all this information, you may get better performance by decompressing
+        // less information.
         if let Some(mut block_segment_postings) =
             inverted_index.read_block_postings(&term_the, IndexRecordOption::Basic)?
         {

diff --git a/examples/multiple_producer.rs b/examples/multiple_producer.rs
@@ -28,6 +28,7 @@
 use std::sync::{Arc, RwLock};
 use std::thread;
 use std::time::Duration;
+
 use tantivy::schema::{Schema, STORED, TEXT};
 use tantivy::{doc, Index, IndexWriter, Opstamp, TantivyError};
 
@@ -90,7 +91,8 @@ fn main() -> tantivy::Result<()> {
     // # In the main thread, we commit 10 times, once every 500ms.
     for _ in 0..10 {
         let opstamp: Opstamp = {
-            // Committing or rollbacking on the other hand requires write lock. This will block other threads.
+            // Committing or rollbacking on the other hand requires write lock. This will block
+            // other threads.
             let mut index_writer_wlock = index_writer.write().unwrap();
             index_writer_wlock.commit()?
         };

diff --git a/examples/snippet.rs b/examples/snippet.rs
@@ -57,7 +57,10 @@ fn main() -> tantivy::Result<()> {
         let doc = searcher.doc(doc_address)?;
         let snippet = snippet_generator.snippet_from_doc(&doc);
         println!("Document score {}:", score);
-        println!("title: {}", doc.get_first(title).unwrap().text().unwrap());
+        println!(
+            "title: {}",
+            doc.get_first(title).unwrap().as_text().unwrap()
+        );
         println!("snippet: {}", snippet.to_html());
         println!("custom highlighting: {}", highlight(snippet));
     }

diff --git a/examples/warmer.rs b/examples/warmer.rs
@@ -6,8 +6,10 @@ use tantivy::collector::TopDocs;
 use tantivy::fastfield::FastFieldReader;
 use tantivy::query::QueryParser;
 use tantivy::schema::{Field, Schema, FAST, TEXT};
-use tantivy::{doc, DocAddress, DocId, Index, IndexReader, SegmentReader};
-use tantivy::{Opstamp, Searcher, SearcherGeneration, SegmentId, Warmer};
+use tantivy::{
+    doc, DocAddress, DocId, Index, IndexReader, Opstamp, Searcher, SearcherGeneration, SegmentId,
+    SegmentReader, Warmer,
+};
 
 // This example shows how warmers can be used to
 // load a values from an external sources using the Warmer API.
@@ -90,7 +92,6 @@ impl Warmer for DynamicPriceColumn {
 /// This map represents a map (ProductId -> Price)
 ///
 /// In practise, it could be fetching things from an external service, like a SQL table.
-///
 #[derive(Default, Clone)]
 pub struct ExternalPriceTable {
     prices: Arc<RwLock<HashMap<ProductId, Price>>>,

diff --git a/fastfield_codecs/benches/bench.rs b/fastfield_codecs/benches/bench.rs
@@ -4,14 +4,14 @@ extern crate test;
 
 #[cfg(test)]
 mod tests {
-    use fastfield_codecs::{
-        bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer},
-        linearinterpol::{LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer},
-        multilinearinterpol::{
-            MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
-        },
-        *,
+    use fastfield_codecs::bitpacked::{BitpackedFastFieldReader, BitpackedFastFieldSerializer};
+    use fastfield_codecs::linearinterpol::{
+        LinearInterpolFastFieldReader, LinearInterpolFastFieldSerializer,
     };
+    use fastfield_codecs::multilinearinterpol::{
+        MultiLinearInterpolFastFieldReader, MultiLinearInterpolFastFieldSerializer,
+    };
+    use fastfield_codecs::*;
 
     fn get_data() -> Vec<u64> {
         let mut data: Vec<_> = (100..55000_u64)

diff --git a/fastfield_codecs/src/bitpacked.rs b/fastfield_codecs/src/bitpacked.rs
@@ -1,13 +1,9 @@
-use crate::FastFieldCodecReader;
-use crate::FastFieldCodecSerializer;
-use crate::FastFieldDataAccess;
-use crate::FastFieldStats;
-use common::BinarySerializable;
 use std::io::{self, Write};
-use tantivy_bitpacker::compute_num_bits;
-use tantivy_bitpacker::BitPacker;
 
-use tantivy_bitpacker::BitUnpacker;
+use common::BinarySerializable;
+use tantivy_bitpacker::{compute_num_bits, BitPacker, BitUnpacker};
+
+use crate::{FastFieldCodecReader, FastFieldCodecSerializer, FastFieldDataAccess, FastFieldStats};
 
 /// Depending on the field type, a different
 /// fast field is required.