Skip to content

Commit

Permalink
feat: write manifests in background tasks (#3709)
Browse files Browse the repository at this point in the history
* chore: truncate wip

* feat: truncate and edit write manifest in background

* refactor: wrap in manifest context

* feat: alter write manifest in background

* chore: fix compiler errors

* feat: flush update manifest in background

* feat: compaction update manifest in background

* feat: set dropping state

* feat: reset drop state

* feat: check state before updating manifest

* test: fix compaction test

* refactor: rename method

* chore: update comment

* chore: discard state guard

* refactor: use atomic cell to store state enum

* chore: fix clippy

* chore: update toml

* chore: remove unused type alias

* feat: check state after writing manifest

* chore: address CR comments

* chore: change status code

* chore: Update src/mito2/src/region.rs

Co-authored-by: Lei, HUANG <[email protected]>

* fix: executes applier

---------

Co-authored-by: Lei, HUANG <[email protected]>
  • Loading branch information
evenyag and v0y4g3r authored Apr 24, 2024
1 parent 86a9895 commit 4685b59
Show file tree
Hide file tree
Showing 28 changed files with 814 additions and 363 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ bytemuck = "1.12"
bytes = { version = "1.5", features = ["serde"] }
chrono = { version = "0.4", features = ["serde"] }
clap = { version = "4.4", features = ["derive"] }
crossbeam-utils = "0.8"
dashmap = "5.4"
datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
datafusion-common = { git = "https://github.com/apache/arrow-datafusion.git", rev = "34eda15b73a9e278af8844b30ed2f1c21c10359c" }
Expand Down
1 change: 1 addition & 0 deletions src/common/error/src/status_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub enum StatusCode {
RegionNotFound = 4005,
RegionAlreadyExists = 4006,
RegionReadonly = 4007,
/// Region is not in a proper state to handle specific request.
RegionNotReady = 4008,
// If mutually exclusive operations are reached at the same time,
// only one can be executed, another one will get region busy.
Expand Down
1 change: 1 addition & 0 deletions src/mito2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ common-test-util = { workspace = true, optional = true }
common-time.workspace = true
common-wal.workspace = true
crc32fast = "1"
crossbeam-utils.workspace = true
datafusion.workspace = true
datafusion-common.workspace = true
datafusion-expr.workspace = true
Expand Down
46 changes: 36 additions & 10 deletions src/mito2/src/compaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ use crate::error::{
use crate::metrics::COMPACTION_STAGE_ELAPSED;
use crate::region::options::CompactionOptions;
use crate::region::version::{VersionControlRef, VersionRef};
use crate::region::ManifestContextRef;
use crate::request::{OptionOutputTx, OutputTx, WorkerRequest};
use crate::schedule::scheduler::SchedulerRef;
use crate::sst::file_purger::FilePurgerRef;
use crate::worker::WorkerListener;

/// Region compaction request.
pub struct CompactionRequest {
Expand All @@ -54,6 +56,9 @@ pub struct CompactionRequest {
/// Start time of compaction task.
pub(crate) start_time: Instant,
pub(crate) cache_manager: CacheManagerRef,
pub(crate) manifest_ctx: ManifestContextRef,
pub(crate) version_control: VersionControlRef,
pub(crate) listener: WorkerListener,
}

impl CompactionRequest {
Expand Down Expand Up @@ -88,19 +93,25 @@ pub(crate) struct CompactionScheduler {
/// Request sender of the worker that this scheduler belongs to.
request_sender: Sender<WorkerRequest>,
cache_manager: CacheManagerRef,
engine_config: Arc<MitoConfig>,
listener: WorkerListener,
}

impl CompactionScheduler {
pub(crate) fn new(
scheduler: SchedulerRef,
request_sender: Sender<WorkerRequest>,
cache_manager: CacheManagerRef,
engine_config: Arc<MitoConfig>,
listener: WorkerListener,
) -> Self {
Self {
scheduler,
region_status: HashMap::new(),
request_sender,
cache_manager,
engine_config,
listener,
}
}

Expand All @@ -112,7 +123,7 @@ impl CompactionScheduler {
access_layer: &AccessLayerRef,
file_purger: &FilePurgerRef,
waiter: OptionOutputTx,
engine_config: Arc<MitoConfig>,
manifest_ctx: &ManifestContextRef,
) -> Result<()> {
if let Some(status) = self.region_status.get_mut(&region_id) {
// Region is compacting. Add the waiter to pending list.
Expand All @@ -130,8 +141,10 @@ impl CompactionScheduler {
let request = status.new_compaction_request(
self.request_sender.clone(),
waiter,
engine_config,
self.engine_config.clone(),
self.cache_manager.clone(),
manifest_ctx,
self.listener.clone(),
);
self.region_status.insert(region_id, status);
self.schedule_compaction_request(request)
Expand All @@ -141,7 +154,7 @@ impl CompactionScheduler {
pub(crate) fn on_compaction_finished(
&mut self,
region_id: RegionId,
engine_config: Arc<MitoConfig>,
manifest_ctx: &ManifestContextRef,
) {
let Some(status) = self.region_status.get_mut(&region_id) else {
return;
Expand All @@ -150,8 +163,10 @@ impl CompactionScheduler {
let request = status.new_compaction_request(
self.request_sender.clone(),
OptionOutputTx::none(),
engine_config,
self.engine_config.clone(),
self.cache_manager.clone(),
manifest_ctx,
self.listener.clone(),
);
// Try to schedule next compaction task for this region.
if let Err(e) = self.schedule_compaction_request(request) {
Expand Down Expand Up @@ -325,6 +340,8 @@ impl CompactionStatus {
waiter: OptionOutputTx,
engine_config: Arc<MitoConfig>,
cache_manager: CacheManagerRef,
manifest_ctx: &ManifestContextRef,
listener: WorkerListener,
) -> CompactionRequest {
let current_version = self.version_control.current().version;
let start_time = Instant::now();
Expand All @@ -337,6 +354,9 @@ impl CompactionStatus {
file_purger: self.file_purger.clone(),
start_time,
cache_manager,
manifest_ctx: manifest_ctx.clone(),
version_control: self.version_control.clone(),
listener,
};

if let Some(pending) = self.pending_compaction.take() {
Expand Down Expand Up @@ -371,14 +391,17 @@ mod tests {
let version_control = Arc::new(builder.build());
let (output_tx, output_rx) = oneshot::channel();
let waiter = OptionOutputTx::from(output_tx);
let manifest_ctx = env
.mock_manifest_context(version_control.current().version.metadata.clone())
.await;
scheduler
.schedule_compaction(
builder.region_id(),
&version_control,
&env.access_layer,
&purger,
waiter,
Arc::new(MitoConfig::default()),
&manifest_ctx,
)
.unwrap();
let output = output_rx.await.unwrap().unwrap();
Expand All @@ -396,7 +419,7 @@ mod tests {
&env.access_layer,
&purger,
waiter,
Arc::new(MitoConfig::default()),
&manifest_ctx,
)
.unwrap();
let output = output_rx.await.unwrap().unwrap();
Expand Down Expand Up @@ -448,14 +471,17 @@ mod tests {
.push_l0_file(90, end)
.build(),
);
let manifest_ctx = env
.mock_manifest_context(version_control.current().version.metadata.clone())
.await;
scheduler
.schedule_compaction(
region_id,
&version_control,
&env.access_layer,
&purger,
OptionOutputTx::none(),
Arc::new(MitoConfig::default()),
&manifest_ctx,
)
.unwrap();
// Should schedule 1 compaction.
Expand Down Expand Up @@ -483,7 +509,7 @@ mod tests {
&env.access_layer,
&purger,
OptionOutputTx::none(),
Arc::new(MitoConfig::default()),
&manifest_ctx,
)
.unwrap();
assert_eq!(1, scheduler.region_status.len());
Expand All @@ -496,7 +522,7 @@ mod tests {
.is_some());

// On compaction finished and schedule next compaction.
scheduler.on_compaction_finished(region_id, Arc::new(MitoConfig::default()));
scheduler.on_compaction_finished(region_id, &manifest_ctx);
assert_eq!(1, scheduler.region_status.len());
assert_eq!(2, job_scheduler.num_jobs());
// 5 files for next compaction.
Expand All @@ -514,7 +540,7 @@ mod tests {
&env.access_layer,
&purger,
OptionOutputTx::none(),
Arc::new(MitoConfig::default()),
&manifest_ctx,
)
.unwrap();
assert_eq!(2, job_scheduler.num_jobs());
Expand Down
95 changes: 66 additions & 29 deletions src/mito2/src/compaction/twcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,23 @@ use crate::compaction::picker::{CompactionTask, Picker};
use crate::compaction::CompactionRequest;
use crate::config::MitoConfig;
use crate::error::{self, CompactRegionSnafu};
use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
use crate::metrics::{COMPACTION_FAILURE_COUNT, COMPACTION_STAGE_ELAPSED};
use crate::read::projection::ProjectionMapper;
use crate::read::scan_region::ScanInput;
use crate::read::seq_scan::SeqScan;
use crate::read::{BoxedBatchReader, Source};
use crate::region::options::IndexOptions;
use crate::region::version::VersionControlRef;
use crate::region::{ManifestContextRef, RegionState};
use crate::request::{
BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest,
};
use crate::sst::file::{FileHandle, FileId, FileMeta, IndexType, Level};
use crate::sst::file_purger::FilePurgerRef;
use crate::sst::parquet::WriteOptions;
use crate::sst::version::LevelMeta;
use crate::worker::WorkerListener;

const MAX_PARALLEL_COMPACTION: usize = 8;

Expand Down Expand Up @@ -140,6 +144,9 @@ impl Picker for TwcsPicker {
file_purger,
start_time,
cache_manager,
manifest_ctx,
version_control,
listener,
} = req;

let region_metadata = current_version.metadata.clone();
Expand Down Expand Up @@ -197,6 +204,9 @@ impl Picker for TwcsPicker {
storage: current_version.options.storage.clone(),
index_options: current_version.options.index_options.clone(),
append_mode: current_version.options.append_mode,
manifest_ctx,
version_control,
listener,
};
Some(Box::new(task))
}
Expand Down Expand Up @@ -341,6 +351,12 @@ pub(crate) struct TwcsCompactionTask {
pub(crate) index_options: IndexOptions,
/// The region is using append mode.
pub(crate) append_mode: bool,
/// Manifest context.
pub(crate) manifest_ctx: ManifestContextRef,
/// Version control to update.
pub(crate) version_control: VersionControlRef,
/// Event listener.
pub(crate) listener: WorkerListener,
}

impl Debug for TwcsCompactionTask {
Expand Down Expand Up @@ -481,18 +497,55 @@ impl TwcsCompactionTask {
Ok((output_files, inputs))
}

async fn handle_compaction(&mut self) -> error::Result<(Vec<FileMeta>, Vec<FileMeta>)> {
async fn handle_compaction(&mut self) -> error::Result<()> {
self.mark_files_compacting(true);
let merge_timer = COMPACTION_STAGE_ELAPSED
.with_label_values(&["merge"])
.start_timer();
let (output, mut compacted) = self.merge_ssts().await.map_err(|e| {
error!(e; "Failed to compact region: {}", self.region_id);
merge_timer.stop_and_discard();
e
})?;
compacted.extend(self.expired_ssts.iter().map(FileHandle::meta));
Ok((output, compacted))
let (added, mut deleted) = match self.merge_ssts().await {
Ok(v) => v,
Err(e) => {
error!(e; "Failed to compact region: {}", self.region_id);
merge_timer.stop_and_discard();
return Err(e);
}
};
deleted.extend(self.expired_ssts.iter().map(FileHandle::meta));
let merge_time = merge_timer.stop_and_record();
info!(
"Compacted SST files, region_id: {}, input: {:?}, output: {:?}, window: {:?}, waiter_num: {}, merge_time: {}s",
self.region_id,
deleted,
added,
self.compaction_time_window,
self.waiters.len(),
merge_time,
);

self.listener.on_merge_ssts_finished(self.region_id).await;

let _manifest_timer = COMPACTION_STAGE_ELAPSED
.with_label_values(&["write_manifest"])
.start_timer();
// Write region edit to manifest.
let edit = RegionEdit {
files_to_add: added,
files_to_remove: deleted,
compaction_time_window: self
.compaction_time_window
.map(|seconds| Duration::from_secs(seconds as u64)),
flushed_entry_id: None,
flushed_sequence: None,
};
let action_list = RegionMetaActionList::with_action(RegionMetaAction::Edit(edit.clone()));
// We might leak files if we fail to update manifest. We can add a cleanup task to
// remove them later.
self.manifest_ctx
.update_manifest(RegionState::Writable, action_list, || {
self.version_control
.apply_edit(edit, &[], self.file_purger.clone());
})
.await
}

/// Handles compaction failure, notifies all waiters.
Expand Down Expand Up @@ -520,27 +573,11 @@ impl TwcsCompactionTask {
impl CompactionTask for TwcsCompactionTask {
async fn run(&mut self) {
let notify = match self.handle_compaction().await {
Ok((added, deleted)) => {
info!(
"Compacted SST files, input: {:?}, output: {:?}, window: {:?}, waiter_num: {}",
deleted,
added,
self.compaction_time_window,
self.waiters.len(),
);

BackgroundNotify::CompactionFinished(CompactionFinished {
region_id: self.region_id,
compaction_outputs: added,
compacted_files: deleted,
senders: std::mem::take(&mut self.waiters),
file_purger: self.file_purger.clone(),
compaction_time_window: self
.compaction_time_window
.map(|seconds| Duration::from_secs(seconds as u64)),
start_time: self.start_time,
})
}
Ok(()) => BackgroundNotify::CompactionFinished(CompactionFinished {
region_id: self.region_id,
senders: std::mem::take(&mut self.waiters),
start_time: self.start_time,
}),
Err(e) => {
error!(e; "Failed to compact region, region id: {}", self.region_id);
let err = Arc::new(e);
Expand Down
4 changes: 2 additions & 2 deletions src/mito2/src/engine/catchup_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ async fn test_catchup_with_manifest_update() {
// Ensures the mutable is empty.
assert!(region.version().memtables.mutable.is_empty());

let manifest = region.manifest_manager.read().await.manifest();
let manifest = region.manifest_ctx.manifest().await;
assert_eq!(manifest.manifest_version, 0);

let resp = follower_engine
Expand All @@ -361,7 +361,7 @@ async fn test_catchup_with_manifest_update() {

// The inner region was replaced. We must get it again.
let region = follower_engine.get_region(region_id).unwrap();
let manifest = region.manifest_manager.read().await.manifest();
let manifest = region.manifest_ctx.manifest().await;
assert_eq!(manifest.manifest_version, 2);
assert!(!region.is_writable());

Expand Down
Loading

0 comments on commit 4685b59

Please sign in to comment.