From abbac46c05ea0cd5742aac6589fc7ca20eb58446 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Wed, 29 Nov 2023 13:49:40 +0800 Subject: [PATCH] fix: do not expose manifest compression algorithm (#2835) * fix: don't allow to set manifest compression algorithm * docs: update config examples --- config/datanode.example.toml | 4 ++-- config/standalone.example.toml | 4 ++-- src/mito2/src/config.rs | 7 +++---- src/mito2/src/manifest/storage.rs | 1 + src/mito2/src/region/opener.rs | 5 ++++- tests-integration/tests/http.rs | 2 +- 6 files changed, 13 insertions(+), 10 deletions(-) diff --git a/config/datanode.example.toml b/config/datanode.example.toml index 256970863184..2b4b7b4b1b9a 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -64,8 +64,8 @@ worker_channel_size = 128 worker_request_batch_size = 64 # Number of meta action updated to trigger a new checkpoint for the manifest manifest_checkpoint_distance = 10 -# Manifest compression type -manifest_compress_type = "uncompressed" +# Whether to compress manifest and checkpoint file by gzip (default false). +compress_manifest = false # Max number of running background jobs max_background_jobs = 4 # Interval to auto flush a region if it has not flushed yet. diff --git a/config/standalone.example.toml b/config/standalone.example.toml index 9055b7eca088..bb8f5c2d1cbb 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -133,8 +133,8 @@ worker_channel_size = 128 worker_request_batch_size = 64 # Number of meta action updated to trigger a new checkpoint for the manifest manifest_checkpoint_distance = 10 -# Manifest compression type -manifest_compress_type = "uncompressed" +# Whether to compress manifest and checkpoint file by gzip (default false). +compress_manifest = false # Max number of running background jobs max_background_jobs = 4 # Interval to auto flush a region if it has not flushed yet. diff --git a/src/mito2/src/config.rs b/src/mito2/src/config.rs index e6061ffa7a2e..edb7d3dd0d65 100644 --- a/src/mito2/src/config.rs +++ b/src/mito2/src/config.rs @@ -17,7 +17,6 @@ use std::time::Duration; use common_base::readable_size::ReadableSize; -use common_datasource::compression::CompressionType; use common_telemetry::warn; use serde::{Deserialize, Serialize}; @@ -42,8 +41,8 @@ pub struct MitoConfig { /// Number of meta action updated to trigger a new checkpoint /// for the manifest (default 10). pub manifest_checkpoint_distance: u64, - /// Manifest compression type (default uncompressed). - pub manifest_compress_type: CompressionType, + /// Whether to compress manifest and checkpoint file by gzip (default false). + pub compress_manifest: bool, // Background job configs: /// Max number of running background jobs (default 4). @@ -78,7 +77,7 @@ impl Default for MitoConfig { worker_channel_size: 128, worker_request_batch_size: 64, manifest_checkpoint_distance: 10, - manifest_compress_type: CompressionType::Uncompressed, + compress_manifest: false, max_background_jobs: DEFAULT_MAX_BG_JOB, auto_flush_interval: Duration::from_secs(30 * 60), global_write_buffer_size: ReadableSize::gb(1), diff --git a/src/mito2/src/manifest/storage.rs b/src/mito2/src/manifest/storage.rs index edd63ac52162..3e8860155b6c 100644 --- a/src/mito2/src/manifest/storage.rs +++ b/src/mito2/src/manifest/storage.rs @@ -42,6 +42,7 @@ const DEFAULT_MANIFEST_COMPRESSION_TYPE: CompressionType = CompressionType::Gzip /// So when we encounter problems, we need to fall back to `FALL_BACK_COMPRESS_TYPE` for processing. const FALL_BACK_COMPRESS_TYPE: CompressionType = CompressionType::Uncompressed; +/// Returns the [CompressionType] according to whether to compress manifest files. #[inline] pub const fn manifest_compress_type(compress: bool) -> CompressionType { if compress { diff --git a/src/mito2/src/region/opener.rs b/src/mito2/src/region/opener.rs index 0baa8dac50b1..4c3a1ad577bd 100644 --- a/src/mito2/src/region/opener.rs +++ b/src/mito2/src/region/opener.rs @@ -33,6 +33,7 @@ use crate::cache::CacheManagerRef; use crate::config::MitoConfig; use crate::error::{EmptyRegionDirSnafu, ObjectStoreNotFoundSnafu, RegionCorruptedSnafu, Result}; use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions}; +use crate::manifest::storage::manifest_compress_type; use crate::memtable::MemtableBuilderRef; use crate::region::options::RegionOptions; use crate::region::version::{VersionBuilder, VersionControl, VersionControlRef}; @@ -259,7 +260,9 @@ impl RegionOpener { Ok(RegionManifestOptions { manifest_dir: new_manifest_dir(&self.region_dir), object_store, - compress_type: config.manifest_compress_type, + // We don't allow users to set the compression algorithm as we use it as a file suffix. + // Currently, the manifest storage doesn't have good support for changing compression algorithms. + compress_type: manifest_compress_type(config.compress_manifest), checkpoint_distance: config.manifest_checkpoint_distance, }) } diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index b6caafdad641..68b0f01d1412 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -720,7 +720,7 @@ num_workers = {} worker_channel_size = 128 worker_request_batch_size = 64 manifest_checkpoint_distance = 10 -manifest_compress_type = "uncompressed" +compress_manifest = false max_background_jobs = 4 auto_flush_interval = "30m" global_write_buffer_size = "1GiB"