From d89af20f2eb61b772dc3108a88c2c50a1799d7f0 Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Fri, 6 Dec 2024 10:03:06 +0800 Subject: [PATCH 1/3] feat: adjust WAL purge default configurations --- config/config.md | 14 +++++++------- config/datanode.example.toml | 9 ++++----- config/metasrv.example.toml | 6 +++--- config/standalone.example.toml | 10 +++++----- src/common/wal/src/config/raft_engine.rs | 4 ++-- tests-integration/tests/http.rs | 4 ++-- 6 files changed, 23 insertions(+), 24 deletions(-) diff --git a/config/config.md b/config/config.md index ec00eb98b730..f366ddb02aea 100644 --- a/config/config.md +++ b/config/config.md @@ -13,7 +13,6 @@ | Key | Type | Default | Descriptions | | --- | -----| ------- | ----------- | | `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. | -| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. | | `default_timezone` | String | Unset | The default timezone of the server. | | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.
By default, it provides services after all regions have been initialized. | | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. | @@ -21,6 +20,7 @@ | `runtime` | -- | -- | The runtime options. | | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. | | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. | +| `runtime.enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. | | `http` | -- | -- | The HTTP server options. | | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. | | `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. | @@ -62,8 +62,8 @@ | `wal.provider` | String | `raft_engine` | The provider of the WAL.
- `raft_engine`: the wal is stored in the local file system by raft-engine.
- `kafka`: it's remote wal that data is stored in Kafka. | | `wal.dir` | String | Unset | The directory to store the WAL files.
**It's only used when the provider is `raft_engine`**. | | `wal.file_size` | String | `256MB` | The size of the WAL segment file.
**It's only used when the provider is `raft_engine`**. | -| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.
**It's only used when the provider is `raft_engine`**. | -| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.
**It's only used when the provider is `raft_engine`**. | +| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.
**It's only used when the provider is `raft_engine`**. | +| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.
**It's only used when the provider is `raft_engine`**. | | `wal.read_batch_size` | Integer | `128` | The read batch size.
**It's only used when the provider is `raft_engine`**. | | `wal.sync_write` | Bool | `false` | Whether to use sync write.
**It's only used when the provider is `raft_engine`**. | | `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.
**It's only used when the provider is `raft_engine`**. | @@ -289,13 +289,13 @@ | `store_addrs` | Array | -- | Store server address default to etcd store. | | `selector` | String | `round_robin` | Datanode selector type.
- `round_robin` (default value)
- `lease_based`
- `load_based`
For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". | | `use_memory_store` | Bool | `false` | Store data in memory. | -| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. | | `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. | | `enable_region_failover` | Bool | `false` | Whether to enable region failover.
This feature is only available on GreptimeDB running on cluster mode and
- Using Remote WAL
- Using shared storage (e.g., s3). | | `backend` | String | `EtcdStore` | The datastore for meta server. | | `runtime` | -- | -- | The runtime options. | | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. | | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. | +| `runtime.enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. | | `procedure` | -- | -- | Procedure storage options. | | `procedure.max_retry_times` | Integer | `12` | Procedure max retry time. | | `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially | @@ -357,7 +357,6 @@ | `node_id` | Integer | Unset | The datanode identifier and should be unique in the cluster. | | `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.
It will block the datanode start if it can't receive leases in the heartbeat from metasrv. | | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.
By default, it provides services after all regions have been initialized. | -| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. | | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. | | `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. | | `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. | @@ -365,6 +364,7 @@ | `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. | | `rpc_max_recv_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_recv_message_size` instead. | | `rpc_max_send_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_send_message_size` instead. | +| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. | | `http` | -- | -- | The HTTP server options. | | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. | | `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. | @@ -400,8 +400,8 @@ | `wal.provider` | String | `raft_engine` | The provider of the WAL.
- `raft_engine`: the wal is stored in the local file system by raft-engine.
- `kafka`: it's remote wal that data is stored in Kafka. | | `wal.dir` | String | Unset | The directory to store the WAL files.
**It's only used when the provider is `raft_engine`**. | | `wal.file_size` | String | `256MB` | The size of the WAL segment file.
**It's only used when the provider is `raft_engine`**. | -| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.
**It's only used when the provider is `raft_engine`**. | -| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.
**It's only used when the provider is `raft_engine`**. | +| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.
**It's only used when the provider is `raft_engine`**. | +| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.
**It's only used when the provider is `raft_engine`**. | | `wal.read_batch_size` | Integer | `128` | The read batch size.
**It's only used when the provider is `raft_engine`**. | | `wal.sync_write` | Bool | `false` | Whether to use sync write.
**It's only used when the provider is `raft_engine`**. | | `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.
**It's only used when the provider is `raft_engine`**. | diff --git a/config/datanode.example.toml b/config/datanode.example.toml index c5fdd24ebe14..5e4ddabb53dd 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -13,9 +13,6 @@ require_lease_before_startup = false ## By default, it provides services after all regions have been initialized. init_regions_in_background = false -## Enable telemetry to collect anonymous usage data. -enable_telemetry = true - ## Parallelism of initializing regions. init_regions_parallelism = 16 @@ -42,6 +39,8 @@ rpc_max_recv_message_size = "512MB" ## @toml2docs:none-default rpc_max_send_message_size = "512MB" +## Enable telemetry to collect anonymous usage data. Enabled by default. +#+ enable_telemetry = true ## The HTTP server options. [http] @@ -147,11 +146,11 @@ file_size = "256MB" ## The threshold of the WAL size to trigger a flush. ## **It's only used when the provider is `raft_engine`**. -purge_threshold = "4GB" +purge_threshold = "1GB" ## The interval to trigger a flush. ## **It's only used when the provider is `raft_engine`**. -purge_interval = "10m" +purge_interval = "1m" ## The read batch size. ## **It's only used when the provider is `raft_engine`**. diff --git a/config/metasrv.example.toml b/config/metasrv.example.toml index bcd7ee41412b..2468a3a4fb78 100644 --- a/config/metasrv.example.toml +++ b/config/metasrv.example.toml @@ -20,9 +20,6 @@ selector = "round_robin" ## Store data in memory. use_memory_store = false -## Whether to enable greptimedb telemetry. -enable_telemetry = true - ## If it's not empty, the metasrv will store all data with this key prefix. store_key_prefix = "" @@ -42,6 +39,9 @@ backend = "EtcdStore" ## The number of threads to execute the runtime for global write operations. #+ compact_rt_size = 4 +## Whether to enable greptimedb telemetry. Enabled by default. +#+ enable_telemetry = true + ## Procedure storage options. [procedure] diff --git a/config/standalone.example.toml b/config/standalone.example.toml index deaf8900f213..e9bf8246ac8e 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -1,9 +1,6 @@ ## The running mode of the datanode. It can be `standalone` or `distributed`. mode = "standalone" -## Enable telemetry to collect anonymous usage data. -enable_telemetry = true - ## The default timezone of the server. ## @toml2docs:none-default default_timezone = "UTC" @@ -25,6 +22,9 @@ max_concurrent_queries = 0 ## The number of threads to execute the runtime for global write operations. #+ compact_rt_size = 4 +## Enable telemetry to collect anonymous usage data. Enabled by default. +#+ enable_telemetry = true + ## The HTTP server options. [http] ## The address to bind the HTTP server. @@ -151,11 +151,11 @@ file_size = "256MB" ## The threshold of the WAL size to trigger a flush. ## **It's only used when the provider is `raft_engine`**. -purge_threshold = "4GB" +purge_threshold = "1GB" ## The interval to trigger a flush. ## **It's only used when the provider is `raft_engine`**. -purge_interval = "10m" +purge_interval = "1m" ## The read batch size. ## **It's only used when the provider is `raft_engine`**. diff --git a/src/common/wal/src/config/raft_engine.rs b/src/common/wal/src/config/raft_engine.rs index af5daa9d386d..15adf195c4b2 100644 --- a/src/common/wal/src/config/raft_engine.rs +++ b/src/common/wal/src/config/raft_engine.rs @@ -50,8 +50,8 @@ impl Default for RaftEngineConfig { Self { dir: None, file_size: ReadableSize::mb(256), - purge_threshold: ReadableSize::gb(4), - purge_interval: Duration::from_secs(600), + purge_threshold: ReadableSize::gb(1), + purge_interval: Duration::from_secs(60), read_batch_size: 128, sync_write: false, enable_log_recycle: true, diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 106c906372b2..42b5c441f8fb 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -882,8 +882,8 @@ with_metric_engine = true [wal] provider = "raft_engine" file_size = "256MiB" -purge_threshold = "4GiB" -purge_interval = "10m" +purge_threshold = "1GiB" +purge_interval = "1m" read_batch_size = 128 sync_write = false enable_log_recycle = true From 24c66d0687aad591e5b643dc4fd28368c27315c7 Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Fri, 6 Dec 2024 15:02:44 +0800 Subject: [PATCH 2/3] fix: config --- config/config.md | 8 ++++---- config/metasrv.example.toml | 16 ++++++++-------- config/standalone.example.toml | 6 +++--- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/config/config.md b/config/config.md index f366ddb02aea..caa7dbf64aee 100644 --- a/config/config.md +++ b/config/config.md @@ -17,10 +17,10 @@ | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.
By default, it provides services after all regions have been initialized. | | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. | | `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. | +| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. | | `runtime` | -- | -- | The runtime options. | | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. | | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. | -| `runtime.enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. | | `http` | -- | -- | The HTTP server options. | | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. | | `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. | @@ -287,15 +287,15 @@ | `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. | | `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. | | `store_addrs` | Array | -- | Store server address default to etcd store. | +| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. | +| `backend` | String | `EtcdStore` | The datastore for meta server. | | `selector` | String | `round_robin` | Datanode selector type.
- `round_robin` (default value)
- `lease_based`
- `load_based`
For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". | | `use_memory_store` | Bool | `false` | Store data in memory. | -| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. | | `enable_region_failover` | Bool | `false` | Whether to enable region failover.
This feature is only available on GreptimeDB running on cluster mode and
- Using Remote WAL
- Using shared storage (e.g., s3). | -| `backend` | String | `EtcdStore` | The datastore for meta server. | +| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. | | `runtime` | -- | -- | The runtime options. | | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. | | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. | -| `runtime.enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. | | `procedure` | -- | -- | Procedure storage options. | | `procedure.max_retry_times` | Integer | `12` | Procedure max retry time. | | `procedure.retry_delay` | String | `500ms` | Initial retry delay of procedures, increases exponentially | diff --git a/config/metasrv.example.toml b/config/metasrv.example.toml index 2468a3a4fb78..27716b5aa37b 100644 --- a/config/metasrv.example.toml +++ b/config/metasrv.example.toml @@ -10,6 +10,12 @@ server_addr = "127.0.0.1:3002" ## Store server address default to etcd store. store_addrs = ["127.0.0.1:2379"] +## If it's not empty, the metasrv will store all data with this key prefix. +store_key_prefix = "" + +## The datastore for meta server. +backend = "EtcdStore" + ## Datanode selector type. ## - `round_robin` (default value) ## - `lease_based` @@ -20,17 +26,14 @@ selector = "round_robin" ## Store data in memory. use_memory_store = false -## If it's not empty, the metasrv will store all data with this key prefix. -store_key_prefix = "" - ## Whether to enable region failover. ## This feature is only available on GreptimeDB running on cluster mode and ## - Using Remote WAL ## - Using shared storage (e.g., s3). enable_region_failover = false -## The datastore for meta server. -backend = "EtcdStore" +## Whether to enable greptimedb telemetry. Enabled by default. +#+ enable_telemetry = true ## The runtime options. #+ [runtime] @@ -39,9 +42,6 @@ backend = "EtcdStore" ## The number of threads to execute the runtime for global write operations. #+ compact_rt_size = 4 -## Whether to enable greptimedb telemetry. Enabled by default. -#+ enable_telemetry = true - ## Procedure storage options. [procedure] diff --git a/config/standalone.example.toml b/config/standalone.example.toml index e9bf8246ac8e..137a73c55e97 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -15,6 +15,9 @@ init_regions_parallelism = 16 ## The maximum current queries allowed to be executed. Zero means unlimited. max_concurrent_queries = 0 +## Enable telemetry to collect anonymous usage data. Enabled by default. +#+ enable_telemetry = true + ## The runtime options. #+ [runtime] ## The number of threads to execute the runtime for global read operations. @@ -22,9 +25,6 @@ max_concurrent_queries = 0 ## The number of threads to execute the runtime for global write operations. #+ compact_rt_size = 4 -## Enable telemetry to collect anonymous usage data. Enabled by default. -#+ enable_telemetry = true - ## The HTTP server options. [http] ## The address to bind the HTTP server. From a87ea061652677d3dfc867ae4e14efa7fe6f6f86 Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Fri, 6 Dec 2024 19:20:02 +0800 Subject: [PATCH 3/3] feat: change raft engine file_size default to 128Mib --- config/config.md | 4 ++-- config/datanode.example.toml | 2 +- config/standalone.example.toml | 2 +- src/common/wal/src/config/raft_engine.rs | 2 +- tests-integration/tests/http.rs | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/config/config.md b/config/config.md index caa7dbf64aee..765fd8dd87b4 100644 --- a/config/config.md +++ b/config/config.md @@ -61,7 +61,7 @@ | `wal` | -- | -- | The WAL options. | | `wal.provider` | String | `raft_engine` | The provider of the WAL.
- `raft_engine`: the wal is stored in the local file system by raft-engine.
- `kafka`: it's remote wal that data is stored in Kafka. | | `wal.dir` | String | Unset | The directory to store the WAL files.
**It's only used when the provider is `raft_engine`**. | -| `wal.file_size` | String | `256MB` | The size of the WAL segment file.
**It's only used when the provider is `raft_engine`**. | +| `wal.file_size` | String | `128MB` | The size of the WAL segment file.
**It's only used when the provider is `raft_engine`**. | | `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.
**It's only used when the provider is `raft_engine`**. | | `wal.purge_interval` | String | `1m` | The interval to trigger a flush.
**It's only used when the provider is `raft_engine`**. | | `wal.read_batch_size` | Integer | `128` | The read batch size.
**It's only used when the provider is `raft_engine`**. | @@ -399,7 +399,7 @@ | `wal` | -- | -- | The WAL options. | | `wal.provider` | String | `raft_engine` | The provider of the WAL.
- `raft_engine`: the wal is stored in the local file system by raft-engine.
- `kafka`: it's remote wal that data is stored in Kafka. | | `wal.dir` | String | Unset | The directory to store the WAL files.
**It's only used when the provider is `raft_engine`**. | -| `wal.file_size` | String | `256MB` | The size of the WAL segment file.
**It's only used when the provider is `raft_engine`**. | +| `wal.file_size` | String | `128MB` | The size of the WAL segment file.
**It's only used when the provider is `raft_engine`**. | | `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.
**It's only used when the provider is `raft_engine`**. | | `wal.purge_interval` | String | `1m` | The interval to trigger a flush.
**It's only used when the provider is `raft_engine`**. | | `wal.read_batch_size` | Integer | `128` | The read batch size.
**It's only used when the provider is `raft_engine`**. | diff --git a/config/datanode.example.toml b/config/datanode.example.toml index 5e4ddabb53dd..088980218891 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -142,7 +142,7 @@ dir = "/tmp/greptimedb/wal" ## The size of the WAL segment file. ## **It's only used when the provider is `raft_engine`**. -file_size = "256MB" +file_size = "128MB" ## The threshold of the WAL size to trigger a flush. ## **It's only used when the provider is `raft_engine`**. diff --git a/config/standalone.example.toml b/config/standalone.example.toml index 137a73c55e97..480286349427 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -147,7 +147,7 @@ dir = "/tmp/greptimedb/wal" ## The size of the WAL segment file. ## **It's only used when the provider is `raft_engine`**. -file_size = "256MB" +file_size = "128MB" ## The threshold of the WAL size to trigger a flush. ## **It's only used when the provider is `raft_engine`**. diff --git a/src/common/wal/src/config/raft_engine.rs b/src/common/wal/src/config/raft_engine.rs index 15adf195c4b2..cfefd0c758b7 100644 --- a/src/common/wal/src/config/raft_engine.rs +++ b/src/common/wal/src/config/raft_engine.rs @@ -49,7 +49,7 @@ impl Default for RaftEngineConfig { fn default() -> Self { Self { dir: None, - file_size: ReadableSize::mb(256), + file_size: ReadableSize::mb(128), purge_threshold: ReadableSize::gb(1), purge_interval: Duration::from_secs(60), read_batch_size: 128, diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 42b5c441f8fb..056cdc5481d2 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -881,7 +881,7 @@ with_metric_engine = true [wal] provider = "raft_engine" -file_size = "256MiB" +file_size = "128MiB" purge_threshold = "1GiB" purge_interval = "1m" read_batch_size = 128