Skip to content

Commit

Permalink
feat: Implements merge_mode region options (GreptimeTeam#4208)
Browse files Browse the repository at this point in the history
* feat: add update_mode to region options

* test: add test

* feat: last not null iter

* feat: time series last not null

* feat: partition tree update mode

* feat: partition tree

* fix: last not null iter slice

* test: add test for compaction

* test: use second resolution

* style: fix clippy

* chore: merge two lines

Co-authored-by: Jeremyhi <[email protected]>

* chore: address CR comments

* refactor: UpdateMode -> MergeMode

* refactor: LastNotNull -> LastNonNull

* chore: return None earlier

* feat: validate region options

make merge mode optional and use default while it is None

* test: fix tests

---------

Co-authored-by: Jeremyhi <[email protected]>
  • Loading branch information
evenyag and fengjiachun authored Jun 27, 2024
1 parent 8702066 commit 10b7a3d
Show file tree
Hide file tree
Showing 18 changed files with 671 additions and 90 deletions.
7 changes: 4 additions & 3 deletions src/mito2/benches/memtable_bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use datatypes::schema::ColumnSchema;
use mito2::memtable::partition_tree::{PartitionTreeConfig, PartitionTreeMemtable};
use mito2::memtable::time_series::TimeSeriesMemtable;
use mito2::memtable::{KeyValues, Memtable};
use mito2::region::options::MergeMode;
use mito2::test_util::memtable_util::{self, region_metadata_to_row_schema};
use rand::rngs::ThreadRng;
use rand::seq::SliceRandom;
Expand Down Expand Up @@ -51,7 +52,7 @@ fn write_rows(c: &mut Criterion) {
});
});
group.bench_function("time_series", |b| {
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None, true);
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None, true, MergeMode::LastRow);
let kvs =
memtable_util::build_key_values(&metadata, "hello".to_string(), 42, &timestamps, 1);
b.iter(|| {
Expand Down Expand Up @@ -83,7 +84,7 @@ fn full_scan(c: &mut Criterion) {
});
});
group.bench_function("time_series", |b| {
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None, true);
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None, true, MergeMode::LastRow);
for kvs in generator.iter() {
memtable.write(&kvs).unwrap();
}
Expand Down Expand Up @@ -121,7 +122,7 @@ fn filter_1_host(c: &mut Criterion) {
});
});
group.bench_function("time_series", |b| {
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None, true);
let memtable = TimeSeriesMemtable::new(metadata.clone(), 1, None, true, MergeMode::LastRow);
for kvs in generator.iter() {
memtable.write(&kvs).unwrap();
}
Expand Down
45 changes: 27 additions & 18 deletions src/mito2/src/compaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ use crate::read::projection::ProjectionMapper;
use crate::read::scan_region::ScanInput;
use crate::read::seq_scan::SeqScan;
use crate::read::BoxedBatchReader;
use crate::region::options::MergeMode;
use crate::region::version::{VersionControlRef, VersionRef};
use crate::region::ManifestContextRef;
use crate::request::{OptionOutputTx, OutputTx, WorkerRequest};
Expand Down Expand Up @@ -453,31 +454,39 @@ pub struct SerializedCompactionOutput {
output_time_range: Option<TimestampRange>,
}

/// Builds [BoxedBatchReader] that reads all SST files and yields batches in primary key order.
async fn build_sst_reader(
/// Builders to create [BoxedBatchReader] for compaction.
struct CompactionSstReaderBuilder<'a> {
metadata: RegionMetadataRef,
sst_layer: AccessLayerRef,
cache: Option<CacheManagerRef>,
inputs: &[FileHandle],
inputs: &'a [FileHandle],
append_mode: bool,
filter_deleted: bool,
time_range: Option<TimestampRange>,
) -> Result<BoxedBatchReader> {
let mut scan_input = ScanInput::new(sst_layer, ProjectionMapper::all(&metadata)?)
.with_files(inputs.to_vec())
.with_append_mode(append_mode)
.with_cache(cache)
.with_filter_deleted(filter_deleted)
// We ignore file not found error during compaction.
.with_ignore_file_not_found(true);

// This serves as a workaround of https://github.com/GreptimeTeam/greptimedb/issues/3944
// by converting time ranges into predicate.
if let Some(time_range) = time_range {
scan_input = scan_input.with_predicate(time_range_to_predicate(time_range, &metadata)?);
}
merge_mode: MergeMode,
}

SeqScan::new(scan_input).build_reader().await
impl<'a> CompactionSstReaderBuilder<'a> {
/// Builds [BoxedBatchReader] that reads all SST files and yields batches in primary key order.
async fn build_sst_reader(self) -> Result<BoxedBatchReader> {
let mut scan_input = ScanInput::new(self.sst_layer, ProjectionMapper::all(&self.metadata)?)
.with_files(self.inputs.to_vec())
.with_append_mode(self.append_mode)
.with_cache(self.cache)
.with_filter_deleted(self.filter_deleted)
// We ignore file not found error during compaction.
.with_ignore_file_not_found(true)
.with_merge_mode(self.merge_mode);

// This serves as a workaround of https://github.com/GreptimeTeam/greptimedb/issues/3944
// by converting time ranges into predicate.
if let Some(time_range) = self.time_range {
scan_input =
scan_input.with_predicate(time_range_to_predicate(time_range, &self.metadata)?);
}

SeqScan::new(scan_input).build_reader().await
}
}

/// Converts time range to predicates so that rows outside the range will be filtered.
Expand Down
24 changes: 14 additions & 10 deletions src/mito2/src/compaction/compactor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ use store_api::storage::RegionId;

use crate::access_layer::{AccessLayer, AccessLayerRef, SstWriteRequest};
use crate::cache::{CacheManager, CacheManagerRef};
use crate::compaction::build_sst_reader;
use crate::compaction::picker::{new_picker, PickerOutput};
use crate::compaction::CompactionSstReaderBuilder;
use crate::config::MitoConfig;
use crate::error::{EmptyRegionDirSnafu, JoinSnafu, ObjectStoreNotFoundSnafu, Result};
use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList};
Expand Down Expand Up @@ -137,7 +137,8 @@ pub async fn open_compaction_region(
let memtable_builder = MemtableBuilderProvider::new(None, Arc::new(mito_config.clone()))
.builder_for_options(
req.region_options.memtable.as_ref(),
!req.region_options.append_mode,
req.region_options.need_dedup(),
req.region_options.merge_mode(),
);

// Initial memtable id is 0.
Expand Down Expand Up @@ -282,16 +283,19 @@ impl Compactor for DefaultCompactor {
.index_options
.clone();
let append_mode = compaction_region.current_version.options.append_mode;
let merge_mode = compaction_region.current_version.options.merge_mode();
futs.push(async move {
let reader = build_sst_reader(
region_metadata.clone(),
sst_layer.clone(),
Some(cache_manager.clone()),
&output.inputs,
let reader = CompactionSstReaderBuilder {
metadata: region_metadata.clone(),
sst_layer: sst_layer.clone(),
cache: Some(cache_manager.clone()),
inputs: &output.inputs,
append_mode,
output.filter_deleted,
output.output_time_range,
)
filter_deleted: output.filter_deleted,
time_range: output.output_time_range,
merge_mode,
}
.build_sst_reader()
.await?;
let file_meta_opt = sst_layer
.write_sst(
Expand Down
1 change: 1 addition & 0 deletions src/mito2/src/compaction/window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ mod tests {
wal_options: Default::default(),
index_options: Default::default(),
memtable: None,
merge_mode: None,
},
})
}
Expand Down
2 changes: 2 additions & 0 deletions src/mito2/src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ mod flush_test;
#[cfg(any(test, feature = "test"))]
pub mod listener;
#[cfg(test)]
mod merge_mode_test;
#[cfg(test)]
mod open_test;
#[cfg(test)]
mod parallel_test;
Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/engine/append_mode_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ async fn test_append_mode_compaction() {
.await
.unwrap();

// Flush 2 SSTs for compaction.
// Flush 3 SSTs for compaction.
// a, field 1, 2
let rows = Rows {
schema: column_schemas.clone(),
Expand Down
208 changes: 208 additions & 0 deletions src/mito2/src/engine/merge_mode_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Tests for append mode.
use api::v1::Rows;
use common_recordbatch::RecordBatches;
use store_api::region_engine::RegionEngine;
use store_api::region_request::{RegionCompactRequest, RegionRequest};
use store_api::storage::{RegionId, ScanRequest};

use crate::config::MitoConfig;
use crate::test_util::batch_util::sort_batches_and_print;
use crate::test_util::{
build_delete_rows_for_key, build_rows_with_fields, delete_rows, delete_rows_schema,
flush_region, put_rows, reopen_region, rows_schema, CreateRequestBuilder, TestEnv,
};

#[tokio::test]
async fn test_merge_mode_write_query() {
common_telemetry::init_default_ut_logging();

let mut env = TestEnv::new();
let engine = env.create_engine(MitoConfig::default()).await;

let region_id = RegionId::new(1, 1);
let request = CreateRequestBuilder::new()
.field_num(2)
.insert_option("merge_mode", "last_non_null")
.build();

let column_schemas = rows_schema(&request);
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();

let rows = build_rows_with_fields(
"a",
&[1, 2, 3],
&[(Some(1), None), (None, None), (None, Some(3))],
);
let rows = Rows {
schema: column_schemas.clone(),
rows,
};
put_rows(&engine, region_id, rows).await;

let rows = build_rows_with_fields("a", &[2, 3], &[(Some(12), None), (Some(13), None)]);
let rows = Rows {
schema: column_schemas.clone(),
rows,
};
put_rows(&engine, region_id, rows).await;

let rows = build_rows_with_fields("a", &[1, 2], &[(Some(11), None), (Some(22), Some(222))]);
let rows = Rows {
schema: column_schemas,
rows,
};
put_rows(&engine, region_id, rows).await;

let request = ScanRequest::default();
let stream = engine.scan_to_stream(region_id, request).await.unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
let expected = "\
+-------+---------+---------+---------------------+
| tag_0 | field_0 | field_1 | ts |
+-------+---------+---------+---------------------+
| a | 11.0 | | 1970-01-01T00:00:01 |
| a | 22.0 | 222.0 | 1970-01-01T00:00:02 |
| a | 13.0 | 3.0 | 1970-01-01T00:00:03 |
+-------+---------+---------+---------------------+";
assert_eq!(expected, batches.pretty_print().unwrap());
}

#[tokio::test]
async fn test_merge_mode_compaction() {
common_telemetry::init_default_ut_logging();

let mut env = TestEnv::new();
let engine = env
.create_engine(MitoConfig {
scan_parallelism: 2,
..Default::default()
})
.await;
let region_id = RegionId::new(1, 1);

let request = CreateRequestBuilder::new()
.field_num(2)
.insert_option("compaction.type", "twcs")
.insert_option("compaction.twcs.max_active_window_files", "2")
.insert_option("compaction.twcs.max_inactive_window_files", "2")
.insert_option("merge_mode", "last_non_null")
.build();
let region_dir = request.region_dir.clone();
let region_opts = request.options.clone();
let delete_schema = delete_rows_schema(&request);
let column_schemas = rows_schema(&request);
engine
.handle_request(region_id, RegionRequest::Create(request))
.await
.unwrap();

// Flush 3 SSTs for compaction.
// a, 1 => (1, null), 2 => (null, null), 3 => (null, 3), 4 => (4, 4)
let rows = build_rows_with_fields(
"a",
&[1, 2, 3, 4],
&[
(Some(1), None),
(None, None),
(None, Some(3)),
(Some(4), Some(4)),
],
);
let rows = Rows {
schema: column_schemas.clone(),
rows,
};
put_rows(&engine, region_id, rows).await;
flush_region(&engine, region_id, None).await;

// a, 1 => (null, 11), 2 => (2, null), 3 => (null, 13)
let rows = build_rows_with_fields(
"a",
&[1, 2, 3],
&[(None, Some(11)), (Some(2), None), (None, Some(13))],
);
let rows = Rows {
schema: column_schemas.clone(),
rows,
};
put_rows(&engine, region_id, rows).await;
flush_region(&engine, region_id, None).await;

// Delete a, 4
let rows = Rows {
schema: delete_schema.clone(),
rows: build_delete_rows_for_key("a", 4, 5),
};
delete_rows(&engine, region_id, rows).await;
flush_region(&engine, region_id, None).await;

let output = engine
.handle_request(
region_id,
RegionRequest::Compact(RegionCompactRequest::default()),
)
.await
.unwrap();
assert_eq!(output.affected_rows, 0);

// a, 1 => (21, null), 2 => (22, null)
let rows = build_rows_with_fields("a", &[1, 2], &[(Some(21), None), (Some(22), None)]);
let rows = Rows {
schema: column_schemas.clone(),
rows,
};
put_rows(&engine, region_id, rows).await;

let expected = "\
+-------+---------+---------+---------------------+
| tag_0 | field_0 | field_1 | ts |
+-------+---------+---------+---------------------+
| a | 21.0 | 11.0 | 1970-01-01T00:00:01 |
| a | 22.0 | | 1970-01-01T00:00:02 |
| a | | 13.0 | 1970-01-01T00:00:03 |
+-------+---------+---------+---------------------+";
// Scans in parallel.
let scanner = engine.scanner(region_id, ScanRequest::default()).unwrap();
assert_eq!(1, scanner.num_files());
assert_eq!(1, scanner.num_memtables());
let stream = scanner.scan().await.unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
assert_eq!(expected, sort_batches_and_print(&batches, &["tag_0", "ts"]));

// Reopens engine with parallelism 1.
let engine = env
.reopen_engine(
engine,
MitoConfig {
scan_parallelism: 1,
..Default::default()
},
)
.await;
// Reopens the region.
reopen_region(&engine, region_id, region_dir, false, region_opts).await;
let stream = engine
.scan_to_stream(region_id, ScanRequest::default())
.await
.unwrap();
let batches = RecordBatches::try_collect(stream).await.unwrap();
assert_eq!(expected, sort_batches_and_print(&batches, &["tag_0", "ts"]));
}
Loading

0 comments on commit 10b7a3d

Please sign in to comment.