Skip to content

Commit

Permalink
NOBUG Added setting merge policy.
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Nov 27, 2016
1 parent cefb09c commit dff022b
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 14 deletions.
35 changes: 33 additions & 2 deletions src/indexer/index_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use core::SerializableSegment;
use core::Index;
use core::Segment;
use std::thread::JoinHandle;
use indexer::{MergePolicy, DefaultMergePolicy};
use indexer::SegmentWriter;
use super::directory_lock::DirectoryLock;
use std::clone::Clone;
Expand All @@ -15,6 +16,7 @@ use indexer::merger::IndexMerger;
use core::SegmentId;
use datastruct::stacker::Heap;
use std::mem::swap;
use std::sync::{Arc, Mutex};
use chan;
use core::SegmentMeta;
use super::segment_updater::{SegmentUpdater, SegmentUpdate, SegmentUpdateSender};
Expand Down Expand Up @@ -53,6 +55,8 @@ pub struct IndexWriter {
// lifetime of the lock with that of the IndexWriter.
_directory_lock: DirectoryLock,

_merge_policy: Arc<Mutex<Box<MergePolicy>>>,

index: Index,
heap_size_in_bytes_per_thread: usize,

Expand Down Expand Up @@ -204,12 +208,16 @@ impl IndexWriter {
let (document_sender, document_receiver): (DocumentSender, DocumentReceiver) =
chan::sync(PIPELINE_MAX_SIZE_IN_DOCS);

let (segment_update_sender, segment_update_thread) = SegmentUpdater::start_updater(index.clone());
let merge_policy: Arc<Mutex<Box<MergePolicy>>> = Arc::new(Mutex::new(box DefaultMergePolicy::default()));

let (segment_update_sender, segment_update_thread) = SegmentUpdater::start_updater(index.clone(), merge_policy.clone());

let mut index_writer = IndexWriter {

_directory_lock: directory_lock,

_merge_policy: merge_policy,

heap_size_in_bytes_per_thread: heap_size_in_bytes_per_thread,
index: index.clone(),

Expand All @@ -229,7 +237,18 @@ impl IndexWriter {
try!(index_writer.start_workers());
Ok(index_writer)
}



/// Returns a clone of the index_writer merge policy.
pub fn get_merge_policy(&self) -> Box<MergePolicy> {
self._merge_policy.lock().unwrap().box_clone()
}

/// Set the merge policy.
pub fn set_merge_policy(&self, merge_policy: Box<MergePolicy>) {
*self._merge_policy.lock().unwrap() = merge_policy;
}

fn start_workers(&mut self) -> Result<()> {
for _ in 0..self.num_threads {
try!(self.add_indexing_worker());
Expand Down Expand Up @@ -445,6 +464,7 @@ mod tests {
use Index;
use Term;
use Error;
use indexer::NoMergePolicy;

#[test]
fn test_lockfile_stops_duplicates() {
Expand All @@ -456,6 +476,17 @@ mod tests {
_ => panic!("Expected FileAlreadyExists error"),
}
}

#[test]
fn test_set_merge_policy() {
let schema_builder = schema::SchemaBuilder::default();
let index = Index::create_in_ram(schema_builder.build());
let index_writer = index.writer(40_000_000).unwrap();
assert_eq!(format!("{:?}", index_writer.get_merge_policy()), "LogMergePolicy { min_merge_size: 8, min_layer_size: 10000, level_log_size: 0.75 }");
let merge_policy = box NoMergePolicy::default();
index_writer.set_merge_policy(merge_policy);
assert_eq!(format!("{:?}", index_writer.get_merge_policy()), "NoMergePolicy");
}

#[test]
fn test_lockfile_released_on_drop() {
Expand Down
11 changes: 9 additions & 2 deletions src/indexer/log_merge_policy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ const DEFAULT_LEVEL_LOG_SIZE: f64 = 0.75;
const DEFAULT_MIN_LAYER_SIZE: u32 = 10_000;
const DEFAULT_MIN_MERGE_SIZE: usize = 8;


/// LogMergePolicy tries tries to merge segments that have a similar number of
/// documents.
#[derive(Debug, Clone)]
pub struct LogMergePolicy {
min_merge_size: usize,
min_layer_size: u32,
Expand All @@ -20,7 +24,7 @@ impl LogMergePolicy {
}

/// Set the minimum number of segment that may be merge together.
pub fn set_min_merge_size(&mut self, min_merge_size: usize) {
pub fn set_min_merge_size(&mut self, min_merge_size: usize) {
self.min_merge_size = min_merge_size;
}

Expand All @@ -30,7 +34,6 @@ impl LogMergePolicy {
self.min_layer_size = min_layer_size;
}


/// Set the ratio between two consecutive levels.
///
/// Segment are group in levels according to their sizes.
Expand Down Expand Up @@ -83,6 +86,10 @@ impl MergePolicy for LogMergePolicy {

result
}

fn box_clone(&self) -> Box<MergePolicy> {
box self.clone()
}
}

impl Default for LogMergePolicy {
Expand Down
22 changes: 20 additions & 2 deletions src/indexer/merge_policy.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,30 @@
use core::SegmentId;
use core::SegmentMeta;
use std::marker;
use std::fmt::Debug;


/// Set of segment suggested for a merge.
#[derive(Debug, Clone)]
pub struct MergeCandidate(pub Vec<SegmentId>);

pub trait MergePolicy: marker::Send {

/// The Merge policy defines which segments should be merged.
///
/// Every time a the list of segments changes, the segment updater
/// asks the merge policy if some segments should be merged.
pub trait MergePolicy: marker::Send + Debug {
/// Given the list of segment metas, returns the list of merge candidates.
///
/// This call happens on the segment updater thread, and will block
/// other segment updates, so all implementations should happen rapidly.
fn compute_merge_candidates(&self, segments: &[SegmentMeta]) -> Vec<MergeCandidate>;
/// Returns a boxed clone of the MergePolicy.
fn box_clone(&self) -> Box<MergePolicy>;
}

/// Never merge segments.
#[derive(Debug)]
pub struct NoMergePolicy;

impl Default for NoMergePolicy {
Expand All @@ -18,10 +33,13 @@ impl Default for NoMergePolicy {
}
}


impl MergePolicy for NoMergePolicy {
fn compute_merge_candidates(&self, _segments: &[SegmentMeta]) -> Vec<MergeCandidate> {
Vec::new()
}

fn box_clone(&self) -> Box<MergePolicy> {
box NoMergePolicy
}
}

2 changes: 2 additions & 0 deletions src/indexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ pub use self::log_merge_policy::LogMergePolicy;
pub use self::merge_policy::{NoMergePolicy, MergeCandidate, MergePolicy};
pub use self::segment_manager::SegmentManager;


/// Alias for the default merge policy, which is the LogMergePolicy.
pub type DefaultMergePolicy = LogMergePolicy;
18 changes: 10 additions & 8 deletions src/indexer/segment_updater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@

use chan;
use core::Index;
use std::sync::Mutex;
use core::Segment;
use core::SegmentId;
use core::SegmentMeta;
use std::mem;
use core::SerializableSegment;
use indexer::{DefaultMergePolicy, MergePolicy};
use indexer::MergePolicy;
use indexer::MergeCandidate;
use indexer::merger::IndexMerger;
use indexer::SegmentSerializer;
Expand Down Expand Up @@ -135,20 +136,20 @@ pub struct SegmentUpdater {
segment_update_receiver: SegmentUpdateReceiver,
segment_update_sender: SegmentUpdateSender,
segment_manager_arc: Arc<SegmentManager>,
merge_policy: Box<MergePolicy>,
merge_policy: Arc<Mutex<Box<MergePolicy>>>,
merging_thread_id: usize,
merging_threads: HashMap<usize, JoinHandle<(Vec<SegmentId>, SegmentMeta)> >,
}


impl SegmentUpdater {

pub fn start_updater(index: Index) -> (SegmentUpdateSender, JoinHandle<()>) {
let segment_updater = SegmentUpdater::new(index);
pub fn start_updater(index: Index, merge_policy: Arc<Mutex<Box<MergePolicy>>>) -> (SegmentUpdateSender, JoinHandle<()>) {
let segment_updater = SegmentUpdater::new(index, merge_policy);
(segment_updater.segment_update_sender.clone(), segment_updater.start())
}

fn new(index: Index) -> SegmentUpdater {
fn new(index: Index, merge_policy: Arc<Mutex<Box<MergePolicy>>>) -> SegmentUpdater {
let segment_manager_arc = get_segment_manager(&index);
let (segment_update_sender, segment_update_receiver): (SegmentUpdateSender, SegmentUpdateReceiver) = chan::async();
SegmentUpdater {
Expand All @@ -157,7 +158,7 @@ impl SegmentUpdater {
segment_update_sender: segment_update_sender,
segment_update_receiver: segment_update_receiver,
segment_manager_arc: segment_manager_arc,
merge_policy: Box::new(DefaultMergePolicy::default()), // TODO make that configurable
merge_policy: merge_policy,
merging_thread_id: 0,
merging_threads: HashMap::new(),
}
Expand Down Expand Up @@ -236,8 +237,9 @@ impl SegmentUpdater {
let (committed_segments, uncommitted_segments) = get_segment_ready_for_commit(segment_manager);
// Committed segments cannot be merged with uncommitted_segments.
// We therefore consider merges using these two sets of segments independantly.
let mut merge_candidates = self.merge_policy.compute_merge_candidates(&uncommitted_segments);
let committed_merge_candidates = self.merge_policy.compute_merge_candidates(&committed_segments);
let merge_policy_lock = self.merge_policy.lock().unwrap();
let mut merge_candidates = merge_policy_lock.compute_merge_candidates(&uncommitted_segments);
let committed_merge_candidates = merge_policy_lock.compute_merge_candidates(&committed_segments);
merge_candidates.extend_from_slice(&committed_merge_candidates[..]);
merge_candidates
}
Expand Down
10 changes: 10 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,16 @@ pub use postings::Postings;
pub use postings::SegmentPostingsOption;



/// Tantivy's makes it possible to personalize when
/// the indexer should merge its segments
pub mod merge_policy {
pub use indexer::MergePolicy;
pub use indexer::LogMergePolicy;
pub use indexer::NoMergePolicy;
pub use indexer::DefaultMergePolicy;
}

/// u32 identifying a document within a segment.
/// Documents have their doc id assigned incrementally,
/// as they are added in the segment.
Expand Down

0 comments on commit dff022b

Please sign in to comment.