From 6a88ac3fe3bcd29072b4a42588c7d30a8386fc88 Mon Sep 17 00:00:00 2001 From: Bruce Mitchener Date: Sun, 18 Sep 2022 14:11:12 +0700 Subject: [PATCH] Documentation improvements. Fix some linking, some grammar, some typos, etc. --- ARCHITECTURE.md | 2 +- common/src/lib.rs | 8 ++-- common/src/writer.rs | 2 +- examples/custom_tokenizer.rs | 3 +- query-grammar/src/query_grammar.rs | 4 +- src/collector/facet_collector.rs | 10 ++--- src/collector/mod.rs | 6 +-- src/core/index.rs | 56 +++++++++++++++----------- src/core/searcher.rs | 2 +- src/directory/directory.rs | 41 ++++++++++--------- src/directory/directory_lock.rs | 7 ++-- src/directory/mmap_directory.rs | 6 +-- src/directory/ram_directory.rs | 12 +++--- src/fastfield/bytes/writer.rs | 8 ++-- src/indexer/index_writer.rs | 16 ++++---- src/positions/mod.rs | 2 +- src/postings/block_search.rs | 2 +- src/query/phrase_query/phrase_query.rs | 10 ++--- src/query/weight.rs | 6 ++- src/reader/mod.rs | 20 ++++----- src/store/mod.rs | 3 +- src/tokenizer/mod.rs | 2 +- 22 files changed, 121 insertions(+), 107 deletions(-) diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index f48ce4ad1d..c3dde5e8dc 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -95,7 +95,7 @@ called [`Directory`](src/directory/directory.rs). Contrary to Lucene however, "files" are quite different from some kind of `io::Read` object. Check out [`src/directory/directory.rs`](src/directory/directory.rs) trait for more details. -Tantivy ships two main directory implementation: the `MMapDirectory` and the `RAMDirectory`, +Tantivy ships two main directory implementation: the `MmapDirectory` and the `RamDirectory`, but users can extend tantivy with their own implementation. ## [schema/](src/schema): What are documents? diff --git a/common/src/lib.rs b/common/src/lib.rs index 4463c46e1f..9dac16de1b 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -55,13 +55,13 @@ const HIGHEST_BIT: u64 = 1 << 63; /// to values over 2^63, and all values end up requiring 64 bits. /// /// # See also -/// The [reverse mapping is `u64_to_i64`](./fn.u64_to_i64.html). +/// The reverse mapping is [`u64_to_i64()`]. #[inline] pub fn i64_to_u64(val: i64) -> u64 { (val as u64) ^ HIGHEST_BIT } -/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html). +/// Reverse the mapping given by [`i64_to_u64()`]. #[inline] pub fn u64_to_i64(val: u64) -> i64 { (val ^ HIGHEST_BIT) as i64 @@ -83,7 +83,7 @@ pub fn u64_to_i64(val: u64) -> i64 { /// explains the mapping in a clear manner. /// /// # See also -/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html). +/// The reverse mapping is [`u64_to_f64()`]. #[inline] pub fn f64_to_u64(val: f64) -> u64 { let bits = val.to_bits(); @@ -94,7 +94,7 @@ pub fn f64_to_u64(val: f64) -> u64 { } } -/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html). +/// Reverse the mapping given by [`f64_to_u64()`]. #[inline] pub fn u64_to_f64(val: u64) -> f64 { f64::from_bits(if val & HIGHEST_BIT != 0 { diff --git a/common/src/writer.rs b/common/src/writer.rs index c0f4f297ed..88457687f4 100644 --- a/common/src/writer.rs +++ b/common/src/writer.rs @@ -55,7 +55,7 @@ impl TerminatingWrite for CountingWriter { } /// Struct used to prevent from calling -/// [`terminate_ref`](trait.TerminatingWrite.html#tymethod.terminate_ref) directly +/// [`terminate_ref`](TerminatingWrite::terminate_ref) directly /// /// The point is that while the type is public, it cannot be built by anyone /// outside of this module. diff --git a/examples/custom_tokenizer.rs b/examples/custom_tokenizer.rs index 5c37216494..4a8a4b754d 100644 --- a/examples/custom_tokenizer.rs +++ b/examples/custom_tokenizer.rs @@ -36,8 +36,7 @@ fn main() -> tantivy::Result<()> { // need to be able to be able to retrieve it // for our application. // - // We can make our index lighter and - // by omitting `STORED` flag. + // We can make our index lighter by omitting the `STORED` flag. let body = schema_builder.add_text_field("body", TEXT); let schema = schema_builder.build(); diff --git a/query-grammar/src/query_grammar.rs b/query-grammar/src/query_grammar.rs index 685fa35ed9..6fabff8cea 100644 --- a/query-grammar/src/query_grammar.rs +++ b/query-grammar/src/query_grammar.rs @@ -23,7 +23,7 @@ const ESCAPED_SPECIAL_CHARS_PATTERN: &str = r#"\\(\+|\^|`|:|\{|\}|"|\[|\]|\(|\)| /// Parses a field_name /// A field name must have at least one character and be followed by a colon. /// All characters are allowed including special characters `SPECIAL_CHARS`, but these -/// need to be escaped with a backslack character '\'. +/// need to be escaped with a backslash character '\'. fn field_name<'a>() -> impl Parser<&'a str, Output = String> { static ESCAPED_SPECIAL_CHARS_RE: Lazy = Lazy::new(|| Regex::new(ESCAPED_SPECIAL_CHARS_PATTERN).unwrap()); @@ -68,7 +68,7 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> { /// /// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99 /// We delegate rejecting such invalid dates to the logical AST computation code -/// which invokes time::OffsetDateTime::parse(..., &Rfc3339) on the value to actually parse +/// which invokes `time::OffsetDateTime::parse(..., &Rfc3339)` on the value to actually parse /// it (instead of merely extracting the datetime value as string as done here). fn date_time<'a>() -> impl Parser<&'a str, Output = String> { let two_digits = || recognize::((digit(), digit())); diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index fc514c8164..9020cf2975 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -67,10 +67,10 @@ fn facet_depth(facet_bytes: &[u8]) -> usize { /// (e.g. `/category/fiction`, `/category/biography`, `/category/personal_development`). /// /// Once collection is finished, you can harvest its results in the form -/// of a `FacetCounts` object, and extract your face t counts from it. +/// of a [`FacetCounts`] object, and extract your facet counts from it. /// /// This implementation assumes you are working with a number of facets that -/// is much hundreds of time lower than your number of documents. +/// is many hundreds of times smaller than your number of documents. /// /// /// ```rust @@ -231,7 +231,7 @@ impl FacetCollector { /// /// Adding two facets within which one is the prefix of the other is forbidden. /// If you need the correct number of unique documents for two such facets, - /// just add them in separate `FacetCollector`. + /// just add them in a separate `FacetCollector`. pub fn add_facet(&mut self, facet_from: T) where Facet: From { let facet = Facet::from(facet_from); @@ -391,7 +391,7 @@ impl<'a> Iterator for FacetChildIterator<'a> { impl FacetCounts { /// Returns an iterator over all of the facet count pairs inside this result. - /// See the documentation for [FacetCollector] for a usage example. + /// See the documentation for [`FacetCollector`] for a usage example. pub fn get(&self, facet_from: T) -> FacetChildIterator<'_> where Facet: From { let facet = Facet::from(facet_from); @@ -410,7 +410,7 @@ impl FacetCounts { } /// Returns a vector of top `k` facets with their counts, sorted highest-to-lowest by counts. - /// See the documentation for [FacetCollector] for a usage example. + /// See the documentation for [`FacetCollector`] for a usage example. pub fn top_k(&self, facet: T, k: usize) -> Vec<(&Facet, u64)> where Facet: From { let mut heap = BinaryHeap::with_capacity(k); diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 1597d7fe45..117d6c22be 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -4,9 +4,9 @@ //! In tantivy jargon, we call this information your search "fruit". //! //! Your fruit could for instance be : -//! - [the count of matching documents](./struct.Count.html) -//! - [the top 10 documents, by relevancy or by a fast field](./struct.TopDocs.html) -//! - [facet counts](./struct.FacetCollector.html) +//! - [the count of matching documents](crate::collector::Count) +//! - [the top 10 documents, by relevancy or by a fast field](crate::collector::TopDocs) +//! - [facet counts](FacetCollector) //! //! At one point in your code, you will trigger the actual search operation by calling //! [the `search(...)` method of your `Searcher` object](../struct.Searcher.html#method.search). diff --git a/src/core/index.rs b/src/core/index.rs index 7c69089484..d0f89e1b04 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -78,8 +78,8 @@ fn save_new_metas( /// IndexBuilder can be used to create an index. /// -/// Use in conjunction with `SchemaBuilder`. Global index settings -/// can be configured with `IndexSettings` +/// Use in conjunction with [`SchemaBuilder`][crate::schema::SchemaBuilder]. +/// Global index settings can be configured with [`IndexSettings`]. /// /// # Examples /// @@ -97,7 +97,13 @@ fn save_new_metas( /// ); /// /// let schema = schema_builder.build(); -/// let settings = IndexSettings{sort_by_field: Some(IndexSortByField{field:"number".to_string(), order:Order::Asc}), ..Default::default()}; +/// let settings = IndexSettings{ +/// sort_by_field: Some(IndexSortByField{ +/// field: "number".to_string(), +/// order: Order::Asc +/// }), +/// ..Default::default() +/// }; /// let index = Index::builder().schema(schema).settings(settings).create_in_ram(); /// ``` pub struct IndexBuilder { @@ -140,7 +146,7 @@ impl IndexBuilder { self } - /// Creates a new index using the `RAMDirectory`. + /// Creates a new index using the [`RamDirectory`]. /// /// The index will be allocated in anonymous memory. /// This should only be used for unit tests. @@ -148,13 +154,14 @@ impl IndexBuilder { let ram_directory = RamDirectory::create(); Ok(self .create(ram_directory) - .expect("Creating a RAMDirectory should never fail")) + .expect("Creating a RamDirectory should never fail")) } /// Creates a new index in a given filepath. - /// The index will use the `MMapDirectory`. + /// The index will use the [`MmapDirectory`]. /// - /// If a previous index was in this directory, it returns an `IndexAlreadyExists` error. + /// If a previous index was in this directory, it returns an + /// [`TantivyError::IndexAlreadyExists`] error. #[cfg(feature = "mmap")] pub fn create_in_dir>(self, directory_path: P) -> crate::Result { let mmap_directory: Box = Box::new(MmapDirectory::open(directory_path)?); @@ -185,12 +192,13 @@ impl IndexBuilder { /// Creates a new index in a temp directory. /// - /// The index will use the `MMapDirectory` in a newly created directory. - /// The temp directory will be destroyed automatically when the `Index` object + /// The index will use the [`MmapDirectory`] in a newly created directory. + /// The temp directory will be destroyed automatically when the [`Index`] object /// is destroyed. /// - /// The temp directory is only used for testing the `MmapDirectory`. - /// For other unit tests, prefer the `RAMDirectory`, see: `create_in_ram`. + /// The temp directory is only used for testing the [`MmapDirectory`]. + /// For other unit tests, prefer the [`RamDirectory`], see: + /// [`IndexBuilder::create_in_ram()`]. #[cfg(feature = "mmap")] pub fn create_from_tempdir(self) -> crate::Result { let mmap_directory: Box = Box::new(MmapDirectory::create_from_tempdir()?); @@ -286,7 +294,7 @@ impl Index { self.set_multithread_executor(default_num_threads) } - /// Creates a new index using the `RamDirectory`. + /// Creates a new index using the [`RamDirectory`]. /// /// The index will be allocated in anonymous memory. /// This is useful for indexing small set of documents @@ -296,9 +304,10 @@ impl Index { } /// Creates a new index in a given filepath. - /// The index will use the `MMapDirectory`. + /// The index will use the [`MmapDirectory`]. /// - /// If a previous index was in this directory, then it returns an `IndexAlreadyExists` error. + /// If a previous index was in this directory, then it returns + /// a [`TantivyError::IndexAlreadyExists`] error. #[cfg(feature = "mmap")] pub fn create_in_dir>( directory_path: P, @@ -320,12 +329,13 @@ impl Index { /// Creates a new index in a temp directory. /// - /// The index will use the `MMapDirectory` in a newly created directory. - /// The temp directory will be destroyed automatically when the `Index` object + /// The index will use the [`MmapDirectory`] in a newly created directory. + /// The temp directory will be destroyed automatically when the [`Index`] object /// is destroyed. /// - /// The temp directory is only used for testing the `MmapDirectory`. - /// For other unit tests, prefer the `RamDirectory`, see: `create_in_ram`. + /// The temp directory is only used for testing the [`MmapDirectory`]. + /// For other unit tests, prefer the [`RamDirectory`], + /// see: [`IndexBuilder::create_in_ram()`]. #[cfg(feature = "mmap")] pub fn create_from_tempdir(schema: Schema) -> crate::Result { IndexBuilder::new().schema(schema).create_from_tempdir() @@ -345,7 +355,7 @@ impl Index { builder.create(dir) } - /// Creates a new index given a directory and an `IndexMeta`. + /// Creates a new index given a directory and an [`IndexMeta`]. fn open_from_metas( directory: ManagedDirectory, metas: &IndexMeta, @@ -372,7 +382,7 @@ impl Index { &self.tokenizers } - /// Helper to access the tokenizer associated to a specific field. + /// Get the tokenizer associated with a specific field. pub fn tokenizer_for_field(&self, field: Field) -> crate::Result { let field_entry = self.schema.get_field_entry(field); let field_type = field_entry.field_type(); @@ -404,14 +414,14 @@ impl Index { }) } - /// Create a default `IndexReader` for the given index. + /// Create a default [`IndexReader`] for the given index. /// - /// See [`Index.reader_builder()`](#method.reader_builder). + /// See [`Index.reader_builder()`]. pub fn reader(&self) -> crate::Result { self.reader_builder().try_into() } - /// Create a `IndexReader` for the given index. + /// Create a [`IndexReader`] for the given index. /// /// Most project should create at most one reader for a given index. /// This method is typically called only once per `Index` instance. diff --git a/src/core/searcher.rs b/src/core/searcher.rs index df77dcd771..7fcb8e2c0f 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -82,7 +82,7 @@ impl Searcher { /// Fetches a document from tantivy's store given a `DocAddress`. /// /// The searcher uses the segment ordinal to route the - /// the request to the right `Segment`. + /// request to the right `Segment`. pub fn doc(&self, doc_address: DocAddress) -> crate::Result { let store_reader = &self.inner.store_readers[doc_address.segment_ord as usize]; store_reader.get(doc_address.doc_id) diff --git a/src/directory/directory.rs b/src/directory/directory.rs index 43d6ce5f4c..5b6dcdaf68 100644 --- a/src/directory/directory.rs +++ b/src/directory/directory.rs @@ -117,9 +117,9 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static { /// change. /// /// Specifically, subsequent writes or flushes should - /// have no effect on the returned `FileSlice` object. + /// have no effect on the returned [`FileSlice`] object. /// - /// You should only use this to read files create with [Directory::open_write]. + /// You should only use this to read files create with [`Directory::open_write()`]. fn open_read(&self, path: &Path) -> Result { let file_handle = self.get_file_handle(path)?; Ok(FileSlice::new(file_handle)) @@ -128,27 +128,28 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static { /// Removes a file /// /// Removing a file will not affect an eventual - /// existing FileSlice pointing to it. + /// existing [`FileSlice`] pointing to it. /// - /// Removing a nonexistent file, yields a - /// `DeleteError::DoesNotExist`. + /// Removing a nonexistent file, returns a + /// [`DeleteError::FileDoesNotExist`]. fn delete(&self, path: &Path) -> Result<(), DeleteError>; /// Returns true if and only if the file exists fn exists(&self, path: &Path) -> Result; /// Opens a writer for the *virtual file* associated with - /// a Path. + /// a [`Path`]. /// /// Right after this call, for the span of the execution of the program - /// the file should be created and any subsequent call to `open_read` for the - /// same path should return a `FileSlice`. + /// the file should be created and any subsequent call to + /// [`Directory::open_read()`] for the same path should return + /// a [`FileSlice`]. /// /// However, depending on the directory implementation, - /// it might be required to call `sync_directory` to ensure + /// it might be required to call [`Directory::sync_directory()`] to ensure /// that the file is durably created. /// (The semantics here are the same when dealing with - /// a posix filesystem.) + /// a POSIX filesystem.) /// /// Write operations may be aggressively buffered. /// The client of this trait is responsible for calling flush @@ -157,19 +158,19 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static { /// /// Flush operation should also be persistent. /// - /// The user shall not rely on `Drop` triggering `flush`. - /// Note that `RamDirectory` will panic! if `flush` - /// was not called. + /// The user shall not rely on [`Drop`] triggering `flush`. + /// Note that [`RamDirectory`][crate::directory::RamDirectory] will + /// panic! if `flush` was not called. /// /// The file may not previously exist. fn open_write(&self, path: &Path) -> Result; /// Reads the full content file that has been written using - /// atomic_write. + /// [`Directory::atomic_write()`]. /// /// This should only be used for small files. /// - /// You should only use this to read files create with [Directory::atomic_write]. + /// You should only use this to read files create with [`Directory::atomic_write()`]. fn atomic_read(&self, path: &Path) -> Result, OpenReadError>; /// Atomically replace the content of a file with data. @@ -188,7 +189,7 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static { /// Acquire a lock in the given directory. /// - /// The method is blocking or not depending on the `Lock` object. + /// The method is blocking or not depending on the [`Lock`] object. fn acquire_lock(&self, lock: &Lock) -> Result { let mut box_directory = self.box_clone(); let mut retry_policy = retry_policy(lock.is_blocking); @@ -210,15 +211,15 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static { } /// Registers a callback that will be called whenever a change on the `meta.json` - /// using the `atomic_write` API is detected. + /// using the [`Directory::atomic_write()`] API is detected. /// - /// The behavior when using `.watch()` on a file using [Directory::open_write] is, on the other - /// hand, undefined. + /// The behavior when using `.watch()` on a file using [`Directory::open_write()`] is, on the + /// other hand, undefined. /// /// The file will be watched for the lifetime of the returned `WatchHandle`. The caller is /// required to keep it. /// It does not override previous callbacks. When the file is modified, all callback that are - /// registered (and whose `WatchHandle` is still alive) are triggered. + /// registered (and whose [`WatchHandle`] is still alive) are triggered. /// /// Internally, tantivy only uses this API to detect new commits to implement the /// `OnCommit` `ReloadPolicy`. Not implementing watch in a `Directory` only prevents the diff --git a/src/directory/directory_lock.rs b/src/directory/directory_lock.rs index c57542a1ad..03a8c112e5 100644 --- a/src/directory/directory_lock.rs +++ b/src/directory/directory_lock.rs @@ -8,8 +8,8 @@ use once_cell::sync::Lazy; /// [`LockParams`](./enum.LockParams.html). /// Tantivy itself uses only two locks but client application /// can use the directory facility to define their own locks. -/// - [INDEX_WRITER_LOCK] -/// - [META_LOCK] +/// - [`INDEX_WRITER_LOCK`] +/// - [`META_LOCK`] /// /// Check out these locks documentation for more information. #[derive(Debug)] @@ -30,7 +30,8 @@ pub struct Lock { } /// Only one process should be able to write tantivy's index at a time. -/// This lock file, when present, is in charge of preventing other processes to open an IndexWriter. +/// This lock file, when present, is in charge of preventing other processes to open an +/// `IndexWriter`. /// /// If the process is killed and this file remains, it is safe to remove it manually. /// diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs index 6d12393710..7b72380934 100644 --- a/src/directory/mmap_directory.rs +++ b/src/directory/mmap_directory.rs @@ -56,10 +56,10 @@ fn open_mmap(full_path: &Path) -> result::Result, OpenReadError> { #[derive(Default, Clone, Debug, Serialize, Deserialize)] pub struct CacheCounters { - // Number of time the cache prevents to call `mmap` + /// Number of time the cache prevents to call `mmap` pub hit: usize, - // Number of time tantivy had to call `mmap` - // as no entry was in the cache. + /// Number of time tantivy had to call `mmap` + /// as no entry was in the cache. pub miss: usize, } diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs index 063c96150b..85d6945382 100644 --- a/src/directory/ram_directory.rs +++ b/src/directory/ram_directory.rs @@ -15,7 +15,7 @@ use crate::directory::{ WatchHandle, WritePtr, }; -/// Writer associated with the `RamDirectory` +/// Writer associated with the [`RamDirectory`]. /// /// The Writer just writes a buffer. struct VecWriter { @@ -137,17 +137,17 @@ impl RamDirectory { } /// Returns the sum of the size of the different files - /// in the RamDirectory. + /// in the [`RamDirectory`]. pub fn total_mem_usage(&self) -> usize { self.fs.read().unwrap().total_mem_usage() } - /// Write a copy of all of the files saved in the RamDirectory in the target `Directory`. + /// Write a copy of all of the files saved in the [`RamDirectory`] in the target [`Directory`]. /// - /// Files are all written using the `Directory::write` meaning, even if they were - /// written using the `atomic_write` api. + /// Files are all written using the [`Directory::open_write()`] meaning, even if they were + /// written using the [`Directory::atomic_write()`] api. /// - /// If an error is encounterred, files may be persisted partially. + /// If an error is encountered, files may be persisted partially. pub fn persist(&self, dest: &dyn Directory) -> crate::Result<()> { let wlock = self.fs.write().unwrap(); for (path, file) in wlock.fs.iter() { diff --git a/src/fastfield/bytes/writer.rs b/src/fastfield/bytes/writer.rs index 84f42cd05a..61085b230a 100644 --- a/src/fastfield/bytes/writer.rs +++ b/src/fastfield/bytes/writer.rs @@ -13,15 +13,17 @@ use crate::DocId; /// This `BytesFastFieldWriter` is only useful for advanced users. /// The normal way to get your associated bytes in your index /// is to -/// - declare your field with fast set to `Cardinality::SingleValue` +/// - declare your field with fast set to +/// [`Cardinality::SingleValue`](crate::schema::Cardinality::SingleValue) /// in your schema /// - add your document simply by calling `.add_document(...)` with associating bytes to the field. /// /// The `BytesFastFieldWriter` can be acquired from the /// fast field writer by calling -/// [`.get_bytes_writer(...)`](./struct.FastFieldsWriter.html#method.get_bytes_writer). +/// [`.get_bytes_writer_mut(...)`](crate::fastfield::FastFieldsWriter::get_bytes_writer_mut). /// -/// Once acquired, writing is done by calling `.add_document_val(&[u8])` +/// Once acquired, writing is done by calling +/// [`.add_document_val(&[u8])`](BytesFastFieldWriter::add_document_val) /// once per document, even if there are no bytes associated to it. pub struct BytesFastFieldWriter { field: Field, diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 08960a3115..70f89490e8 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -31,7 +31,7 @@ pub const MARGIN_IN_BYTES: usize = 1_000_000; pub const MEMORY_ARENA_NUM_BYTES_MIN: usize = ((MARGIN_IN_BYTES as u32) * 3u32) as usize; pub const MEMORY_ARENA_NUM_BYTES_MAX: usize = u32::MAX as usize - MARGIN_IN_BYTES; -// We impose the number of index writer thread to be at most this. +// We impose the number of index writer threads to be at most this. pub const MAX_NUM_THREAD: usize = 8; // Add document will block if the number of docs waiting in the queue to be indexed @@ -40,7 +40,7 @@ const PIPELINE_MAX_SIZE_IN_DOCS: usize = 10_000; fn error_in_index_worker_thread(context: &str) -> TantivyError { TantivyError::ErrorInThread(format!( - "{}. A worker thread encounterred an error (io::Error most likely) or panicked.", + "{}. A worker thread encountered an error (io::Error most likely) or panicked.", context )) } @@ -49,7 +49,7 @@ fn error_in_index_worker_thread(context: &str) -> TantivyError { /// /// It manages a small number of indexing thread, as well as a shared /// indexing queue. -/// Each indexing thread builds its own independent `Segment`, via +/// Each indexing thread builds its own independent [`Segment`], via /// a `SegmentWriter` object. pub struct IndexWriter { // the lock is just used to bind the @@ -385,8 +385,8 @@ impl IndexWriter { .operation_receiver() .ok_or_else(|| { crate::TantivyError::ErrorInThread( - "The index writer was killed. It can happen if an indexing worker \ - encounterred an Io error for instance." + "The index writer was killed. It can happen if an indexing worker encountered \ + an Io error for instance." .to_string(), ) }) @@ -595,14 +595,14 @@ impl IndexWriter { /// * `.commit()`: to accept this commit /// * `.abort()`: to cancel this commit. /// - /// In the current implementation, `PreparedCommit` borrows - /// the `IndexWriter` mutably so we are guaranteed that no new + /// In the current implementation, [`PreparedCommit`] borrows + /// the [`IndexWriter`] mutably so we are guaranteed that no new /// document can be added as long as it is committed or is /// dropped. /// /// It is also possible to add a payload to the `commit` /// using this API. - /// See [`PreparedCommit::set_payload()`](PreparedCommit.html) + /// See [`PreparedCommit::set_payload()`]. pub fn prepare_commit(&mut self) -> crate::Result { // Here, because we join all of the worker threads, // all of the segment update for this commit have been diff --git a/src/positions/mod.rs b/src/positions/mod.rs index 45495e8e62..f0d5d55500 100644 --- a/src/positions/mod.rs +++ b/src/positions/mod.rs @@ -3,7 +3,7 @@ //! In "The beauty and the beast", the term "the" appears in position 0 and position 3. //! This information is useful to run phrase queries. //! -//! The [position](../enum.SegmentComponent.html#variant.Positions) file contains all of the +//! The [position](crate::SegmentComponent::Positions) file contains all of the //! bitpacked positions delta, for all terms of a given field, one term after the other. //! //! Each term is encoded independently. diff --git a/src/postings/block_search.rs b/src/postings/block_search.rs index 3a53bd78f8..0fb21239cb 100644 --- a/src/postings/block_search.rs +++ b/src/postings/block_search.rs @@ -12,7 +12,7 @@ use crate::postings::compression::COMPRESSION_BLOCK_SIZE; /// ``` /// /// the `start` argument is just used to hint that the response is -/// greater than beyond `start`. the implementation may or may not use +/// greater than beyond `start`. The implementation may or may not use /// it for optimization. /// /// # Assumption diff --git a/src/query/phrase_query/phrase_query.rs b/src/query/phrase_query/phrase_query.rs index 4bfd0f0b77..969920aef2 100644 --- a/src/query/phrase_query/phrase_query.rs +++ b/src/query/phrase_query/phrase_query.rs @@ -72,7 +72,7 @@ impl PhraseQuery { self.slop = value; } - /// The `Field` this `PhraseQuery` is targeting. + /// The [`Field`] this `PhraseQuery` is targeting. pub fn field(&self) -> Field { self.field } @@ -85,10 +85,10 @@ impl PhraseQuery { .collect::>() } - /// Returns the `PhraseWeight` for the given phrase query given a specific `searcher`. + /// Returns the [`PhraseWeight`] for the given phrase query given a specific `searcher`. /// - /// This function is the same as `.weight(...)` except it returns - /// a specialized type `PhraseWeight` instead of a Boxed trait. + /// This function is the same as [`Query::weight()`] except it returns + /// a specialized type [`PhraseWeight`] instead of a Boxed trait. pub(crate) fn phrase_weight( &self, searcher: &Searcher, @@ -121,7 +121,7 @@ impl PhraseQuery { impl Query for PhraseQuery { /// Create the weight associated to a query. /// - /// See [`Weight`](./trait.Weight.html). + /// See [`Weight`]. fn weight(&self, searcher: &Searcher, scoring_enabled: bool) -> crate::Result> { let phrase_weight = self.phrase_weight(searcher, scoring_enabled)?; Ok(Box::new(phrase_weight)) diff --git a/src/query/weight.rs b/src/query/weight.rs index 3a2ff3d33c..4dffea977e 100644 --- a/src/query/weight.rs +++ b/src/query/weight.rs @@ -41,10 +41,12 @@ pub(crate) fn for_each_pruning_scorer( } } -/// A Weight is the specialization of a Query +/// A Weight is the specialization of a `Query` /// for a given set of segments. /// -/// See [`Query`](./trait.Query.html). +/// See [`Query`]. +/// +/// [`Query`]: crate::query::Query pub trait Weight: Send + Sync + 'static { /// Returns the scorer for the given segment. /// diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 14d138e8a9..aafc60df04 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -23,7 +23,7 @@ pub enum ReloadPolicy { /// The index is entirely reloaded manually. /// All updates of the index should be manual. /// - /// No change is reflected automatically. You are required to call `IndexReader::reload()` + /// No change is reflected automatically. You are required to call [`IndexReader::reload()`] /// manually. Manual, /// The index is reloaded within milliseconds after a new commit is available. @@ -31,11 +31,11 @@ pub enum ReloadPolicy { OnCommit, // TODO add NEAR_REAL_TIME(target_ms) } -/// [IndexReader] builder +/// [`IndexReader`] builder /// /// It makes it possible to configure: -/// - [ReloadPolicy] defining when new index versions are detected -/// - [Warmer] implementations +/// - [`ReloadPolicy`] defining when new index versions are detected +/// - [`Warmer`] implementations /// - number of warming threads, for parallelizing warming work /// - The cache size of the underlying doc store readers. #[derive(Clone)] @@ -108,7 +108,7 @@ impl IndexReaderBuilder { /// Sets the reload_policy. /// - /// See [`ReloadPolicy`](./enum.ReloadPolicy.html) for more details. + /// See [`ReloadPolicy`] for more details. #[must_use] pub fn reload_policy(mut self, reload_policy: ReloadPolicy) -> IndexReaderBuilder { self.reload_policy = reload_policy; @@ -133,8 +133,8 @@ impl IndexReaderBuilder { /// Sets the number of warming threads. /// - /// This allows parallelizing warming work when there are multiple [Warmer] registered with the - /// [IndexReader]. + /// This allows parallelizing warming work when there are multiple [`Warmer`] registered with + /// the [`IndexReader`]. #[must_use] pub fn num_warming_threads(mut self, num_warming_threads: usize) -> IndexReaderBuilder { self.num_warming_threads = num_warming_threads; @@ -186,7 +186,7 @@ impl InnerIndexReader { searcher_generation_inventory, }) } - /// Opens the freshest segments `SegmentReader`. + /// Opens the freshest segments [`SegmentReader`]. /// /// This function acquires a lot to prevent GC from removing files /// as we are opening our index. @@ -264,7 +264,7 @@ impl InnerIndexReader { /// you instances of `Searcher` for the last loaded version. /// /// `Clone` does not clone the different pool of searcher. `IndexReader` -/// just wraps and `Arc`. +/// just wraps an `Arc`. #[derive(Clone)] pub struct IndexReader { inner: Arc, @@ -280,7 +280,7 @@ impl IndexReader { /// Update searchers so that they reflect the state of the last /// `.commit()`. /// - /// If you set up the `OnCommit` `ReloadPolicy` (which is the default) + /// If you set up the [`ReloadPolicy::OnCommit`] (which is the default) /// every commit should be rapidly reflected on your `IndexReader` and you should /// not need to call `reload()` at all. /// diff --git a/src/store/mod.rs b/src/store/mod.rs index 2c90f3145d..73ecf92f9d 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -27,8 +27,7 @@ //! //! - at the segment level, the //! [`SegmentReader`'s `doc` method](../struct.SegmentReader.html#method.doc) -//! - at the index level, the -//! [`Searcher`'s `doc` method](../struct.Searcher.html#method.doc) +//! - at the index level, the [`Searcher::doc()`](crate::Searcher::doc) method //! //! ! diff --git a/src/tokenizer/mod.rs b/src/tokenizer/mod.rs index 00d4241969..925cd5059b 100644 --- a/src/tokenizer/mod.rs +++ b/src/tokenizer/mod.rs @@ -152,7 +152,7 @@ pub use self::whitespace_tokenizer::WhitespaceTokenizer; /// Maximum authorized len (in bytes) for a token. /// -/// Tokenizer are in charge of not emitting tokens larger than this value. +/// Tokenizers are in charge of not emitting tokens larger than this value. /// Currently, if a faulty tokenizer implementation emits tokens with a length larger than /// `2^16 - 1 - 5`, the token will simply be ignored downstream. pub const MAX_TOKEN_LEN: usize = u16::MAX as usize - 5;