diff --git a/config/datanode.example.toml b/config/datanode.example.toml index a0cc3601906e..db7777af0fcf 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -101,3 +101,17 @@ parallel_scan_channel_size = 32 # [logging] # dir = "/tmp/greptimedb/logs" # level = "info" + +# datanode export the metrics generated by itself +# encoded to Prometheus remote-write format +# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself) +# This is only used for `greptimedb` to export its own metric internally. Please see `logging` option for normal export of metric. +# [system_metric] +# whether enable export system_metric, default is false +# enable = false +# The url of metric export endpoint, default is `greptimedb` default frontend endpoint +# endpoint = "127.0.0.1:4000" +# The database name of exported metrics stores, user needs to specify a valid database +# db = "" +# The interval of export metric +# write_interval = "30s" diff --git a/config/frontend.example.toml b/config/frontend.example.toml index 566ed42f9ecf..e828cee4f74e 100644 --- a/config/frontend.example.toml +++ b/config/frontend.example.toml @@ -77,3 +77,17 @@ tcp_nodelay = true timeout = "10s" connect_timeout = "10s" tcp_nodelay = true + +# frontend export the metrics generated by itself +# encoded to Prometheus remote-write format +# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself) +# This is only used for `greptimedb` to export its own metric internally. Please see `logging` option for normal export of metric. +# [system_metric] +# whether enable export system_metric, default is false +# enable = false +# The url of metric export endpoint, default is `greptimedb` default frontend endpoint +# endpoint = "127.0.0.1:4000" +# The database name of exported metrics stores, user needs to specify a valid database +# db = "" +# The interval of export metric +# write_interval = "30s" diff --git a/config/metasrv.example.toml b/config/metasrv.example.toml index aad33ce1afcf..3fd388390072 100644 --- a/config/metasrv.example.toml +++ b/config/metasrv.example.toml @@ -66,3 +66,17 @@ provider = "raft_engine" # num_partitions = 1 # Expected number of replicas of each partition. # replication_factor = 3 + +# metasrv export the metrics generated by itself +# encoded to Prometheus remote-write format +# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself) +# This is only used for `greptimedb` to export its own metric internally. Please see `logging` option for normal export of metric. +# [system_metric] +# whether enable export system_metric, default is false +# enable = false +# The url of metric export endpoint, default is `greptimedb` default frontend endpoint +# endpoint = "127.0.0.1:4000" +# The database name of exported metrics stores, user needs to specify a valid database +# db = "" +# The interval of export metric +# write_interval = "30s" diff --git a/config/standalone.example.toml b/config/standalone.example.toml index 1521aac44ebf..0f235059d7d8 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -178,14 +178,16 @@ parallel_scan_channel_size = 32 # The percentage of tracing will be sampled and exported. Valid range `[0, 1]`, 1 means all traces are sampled, 0 means all traces are not sampled, the default value is 1. ratio > 1 are treated as 1. Fractions < 0 are treated as 0 # tracing_sample_ratio = 1.0 -# standalone/frontend/datanode/metasrv export the metrics generated by itself -# send metrics to Prometheus remote-write compatible receiver (e.g. `greptimedb`) -# [remote_write] -# whether enable export remote_write, default is false +# standalone export the metrics generated by itself +# encoded to Prometheus remote-write format +# and send to Prometheus remote-write compatible receiver (e.g. send to `greptimedb` itself) +# This is only used for `greptimedb` to export its own metric internally. Please see `logging` option for normal export of metric. +# [system_metric] +# whether enable export system_metric, default is false # enable = false -# The url of remote write endpoint. -# Taking greptimedb as an example, for `standalone` deployed under the default configuration. -# The user can create a database called `system` in the db and export the metric to `http://127.0.0.1:4000/v1/prometheus/write?db=system` -# endpoint = "http://127.0.0.1:4000/v1/prometheus/write?db=system" -# The interval of export metric, +# The url of metric export endpoint, default is `greptimedb` default frontend endpoint +# endpoint = "127.0.0.1:4000" +# The database name of exported metrics stores, user needs to specify a valid database +# db = "" +# The interval of export metric # write_interval = "30s" diff --git a/src/cmd/src/frontend.rs b/src/cmd/src/frontend.rs index beebf834f900..d6b1210104b1 100644 --- a/src/cmd/src/frontend.rs +++ b/src/cmd/src/frontend.rs @@ -250,7 +250,7 @@ impl StartCommand { .context(StartFrontendSnafu)?; instance - .build_remote_write_metric_task(&opts.remote_write) + .build_system_metric_task(&opts.system_metric) .context(StartFrontendSnafu)?; instance diff --git a/src/cmd/src/standalone.rs b/src/cmd/src/standalone.rs index 91e546d67526..f88991377d21 100644 --- a/src/cmd/src/standalone.rs +++ b/src/cmd/src/standalone.rs @@ -45,7 +45,7 @@ use frontend::service_config::{ use mito2::config::MitoConfig; use serde::{Deserialize, Serialize}; use servers::http::HttpOptions; -use servers::remote_writer::RemoteWriteOptions; +use servers::system_metric::SystemMetricOption; use servers::tls::{TlsMode, TlsOption}; use servers::Mode; use snafu::ResultExt; @@ -113,7 +113,7 @@ pub struct StandaloneOptions { pub user_provider: Option, /// Options for different store engines. pub region_engine: Vec, - pub remote_write: RemoteWriteOptions, + pub system_metric: SystemMetricOption, } impl Default for StandaloneOptions { @@ -133,7 +133,7 @@ impl Default for StandaloneOptions { metadata_store: KvBackendConfig::default(), procedure: ProcedureConfig::default(), logging: LoggingOptions::default(), - remote_write: RemoteWriteOptions::default(), + system_metric: SystemMetricOption::default(), user_provider: None, region_engine: vec![ RegionEngineConfig::Mito(MitoConfig::default()), @@ -157,8 +157,8 @@ impl StandaloneOptions { meta_client: None, logging: self.logging, user_provider: self.user_provider, - // Handle the remote write metric task run by standalone to frontend for execution - remote_write: self.remote_write, + // Handle the system metric task run by standalone to frontend for execution + system_metric: self.system_metric, ..Default::default() } } @@ -405,7 +405,7 @@ impl StartCommand { .context(StartFrontendSnafu)?; frontend - .build_remote_write_metric_task(&opts.frontend.remote_write) + .build_system_metric_task(&opts.frontend.system_metric) .context(StartFrontendSnafu)?; frontend diff --git a/src/common/telemetry/src/metric.rs b/src/common/telemetry/src/metric.rs index 9dc5f60171e9..666b0546503f 100644 --- a/src/common/telemetry/src/metric.rs +++ b/src/common/telemetry/src/metric.rs @@ -14,7 +14,6 @@ use std::sync::Arc; -// metric stuffs, inspired by databend use greptime_proto::prometheus::remote::{Sample, TimeSeries}; use greptime_proto::prometheus::*; use prometheus::proto::{LabelPair, MetricFamily, MetricType}; diff --git a/src/datanode/src/config.rs b/src/datanode/src/config.rs index 12ebc71a2d52..44fa5e7eb813 100644 --- a/src/datanode/src/config.rs +++ b/src/datanode/src/config.rs @@ -30,7 +30,7 @@ use secrecy::SecretString; use serde::{Deserialize, Serialize}; use servers::heartbeat_options::HeartbeatOptions; use servers::http::HttpOptions; -use servers::remote_writer::RemoteWriteOptions; +use servers::system_metric::SystemMetricOption; use servers::Mode; pub const DEFAULT_OBJECT_STORE_CACHE_SIZE: ReadableSize = ReadableSize::mb(256); @@ -242,7 +242,7 @@ pub struct DatanodeOptions { pub region_engine: Vec, pub logging: LoggingOptions, pub enable_telemetry: bool, - pub remote_write: RemoteWriteOptions, + pub system_metric: SystemMetricOption, } impl Default for DatanodeOptions { @@ -267,7 +267,7 @@ impl Default for DatanodeOptions { logging: LoggingOptions::default(), heartbeat: HeartbeatOptions::datanode_default(), enable_telemetry: true, - remote_write: RemoteWriteOptions::default(), + system_metric: SystemMetricOption::default(), } } } diff --git a/src/datanode/src/datanode.rs b/src/datanode/src/datanode.rs index 2da4ee3e98f3..be7e5b7d416f 100644 --- a/src/datanode/src/datanode.rs +++ b/src/datanode/src/datanode.rs @@ -42,8 +42,8 @@ use query::QueryEngineFactory; use servers::grpc::{GrpcServer, GrpcServerConfig}; use servers::http::HttpServerBuilder; use servers::metrics_handler::MetricsHandler; -use servers::remote_writer::RemoteWriteMetricTask; use servers::server::{start_server, ServerHandler, ServerHandlers}; +use servers::system_metric::SystemMetricTask; use servers::Mode; use snafu::{OptionExt, ResultExt}; use store_api::logstore::LogStore; @@ -82,7 +82,7 @@ pub struct Datanode { greptimedb_telemetry_task: Arc, leases_notifier: Option>, plugins: Plugins, - remote_write_metric_task: Option, + system_metric_task: Option, } impl Datanode { @@ -94,7 +94,7 @@ impl Datanode { self.start_telemetry(); - if let Some(t) = self.remote_write_metric_task.as_ref() { + if let Some(t) = self.system_metric_task.as_ref() { t.start() } @@ -265,8 +265,8 @@ impl DatanodeBuilder { None }; - let remote_write_metric_task = - RemoteWriteMetricTask::try_new(&self.opts.remote_write, Some(&self.plugins)) + let system_metric_task = + SystemMetricTask::try_new(&self.opts.system_metric, Some(&self.plugins)) .context(StartServerSnafu)?; Ok(Datanode { @@ -277,7 +277,7 @@ impl DatanodeBuilder { region_event_receiver, leases_notifier, plugins: self.plugins.clone(), - remote_write_metric_task, + system_metric_task, }) } diff --git a/src/frontend/src/frontend.rs b/src/frontend/src/frontend.rs index b1928df87819..d0dd0391a4eb 100644 --- a/src/frontend/src/frontend.rs +++ b/src/frontend/src/frontend.rs @@ -17,7 +17,7 @@ use meta_client::MetaClientOptions; use serde::{Deserialize, Serialize}; use servers::heartbeat_options::HeartbeatOptions; use servers::http::HttpOptions; -use servers::remote_writer::RemoteWriteOptions; +use servers::system_metric::SystemMetricOption; use servers::Mode; use snafu::prelude::*; @@ -45,7 +45,7 @@ pub struct FrontendOptions { pub logging: LoggingOptions, pub datanode: DatanodeOptions, pub user_provider: Option, - pub remote_write: RemoteWriteOptions, + pub system_metric: SystemMetricOption, } impl Default for FrontendOptions { @@ -66,7 +66,7 @@ impl Default for FrontendOptions { logging: LoggingOptions::default(), datanode: DatanodeOptions::default(), user_provider: None, - remote_write: RemoteWriteOptions::default(), + system_metric: SystemMetricOption::default(), } } } diff --git a/src/frontend/src/instance.rs b/src/frontend/src/instance.rs index e76dee112daa..23acc9de7f7a 100644 --- a/src/frontend/src/instance.rs +++ b/src/frontend/src/instance.rs @@ -65,8 +65,8 @@ use servers::query_handler::{ InfluxdbLineProtocolHandler, OpenTelemetryProtocolHandler, OpentsdbProtocolHandler, PromStoreProtocolHandler, ScriptHandler, }; -use servers::remote_writer::{RemoteWriteMetricTask, RemoteWriteOptions}; use servers::server::{start_server, ServerHandlers}; +use servers::system_metric::{SystemMetricOption, SystemMetricTask}; use session::context::QueryContextRef; use snafu::prelude::*; use sql::dialect::Dialect; @@ -118,7 +118,7 @@ pub struct Instance { heartbeat_task: Option, inserter: InserterRef, deleter: DeleterRef, - remote_write_metric_task: Option, + system_metric_task: Option, } impl Instance { @@ -196,9 +196,9 @@ impl Instance { Ok(()) } - pub fn build_remote_write_metric_task(&mut self, opts: &RemoteWriteOptions) -> Result<()> { - self.remote_write_metric_task = - RemoteWriteMetricTask::try_new(opts, Some(&self.plugins)).context(StartServerSnafu)?; + pub fn build_system_metric_task(&mut self, opts: &SystemMetricOption) -> Result<()> { + self.system_metric_task = + SystemMetricTask::try_new(opts, Some(&self.plugins)).context(StartServerSnafu)?; Ok(()) } @@ -231,7 +231,7 @@ impl FrontendInstance for Instance { self.script_executor.start(self)?; - if let Some(t) = self.remote_write_metric_task.as_ref() { + if let Some(t) = self.system_metric_task.as_ref() { t.start() } diff --git a/src/frontend/src/instance/builder.rs b/src/frontend/src/instance/builder.rs index 550f6fc4251b..afdee140f25d 100644 --- a/src/frontend/src/instance/builder.rs +++ b/src/frontend/src/instance/builder.rs @@ -144,7 +144,7 @@ impl FrontendBuilder { heartbeat_task: self.heartbeat_task, inserter, deleter, - remote_write_metric_task: None, + system_metric_task: None, }) } } diff --git a/src/meta-srv/src/bootstrap.rs b/src/meta-srv/src/bootstrap.rs index 2a8f47803461..7a354e67ae3e 100644 --- a/src/meta-srv/src/bootstrap.rs +++ b/src/meta-srv/src/bootstrap.rs @@ -28,8 +28,8 @@ use etcd_client::Client; use servers::configurator::ConfiguratorRef; use servers::http::{HttpServer, HttpServerBuilder}; use servers::metrics_handler::MetricsHandler; -use servers::remote_writer::RemoteWriteMetricTask; use servers::server::Server; +use servers::system_metric::SystemMetricTask; use snafu::ResultExt; use tokio::net::TcpListener; use tokio::select; @@ -60,7 +60,7 @@ pub struct MetaSrvInstance { plugins: Plugins, - remote_write_metric_task: Option, + system_metric_task: Option, } impl MetaSrvInstance { @@ -77,23 +77,22 @@ impl MetaSrvInstance { ); // put meta_srv into plugins for later use plugins.insert::>(Arc::new(meta_srv.clone())); - let remote_write_metric_task = - RemoteWriteMetricTask::try_new(&opts.remote_write, Some(&plugins)) - .context(InitRemoteWriteMetricTaskSnafu)?; + let system_metric_task = SystemMetricTask::try_new(&opts.system_metric, Some(&plugins)) + .context(InitRemoteWriteMetricTaskSnafu)?; Ok(MetaSrvInstance { meta_srv, http_srv, opts, signal_sender: None, plugins, - remote_write_metric_task, + system_metric_task, }) } pub async fn start(&mut self) -> Result<()> { self.meta_srv.try_start().await?; - if let Some(t) = self.remote_write_metric_task.as_ref() { + if let Some(t) = self.system_metric_task.as_ref() { t.start() } diff --git a/src/meta-srv/src/metasrv.rs b/src/meta-srv/src/metasrv.rs index 44eff939f101..2a136a6729bb 100644 --- a/src/meta-srv/src/metasrv.rs +++ b/src/meta-srv/src/metasrv.rs @@ -33,7 +33,7 @@ use common_telemetry::logging::LoggingOptions; use common_telemetry::{error, info, warn}; use serde::{Deserialize, Serialize}; use servers::http::HttpOptions; -use servers::remote_writer::RemoteWriteOptions; +use servers::system_metric::SystemMetricOption; use snafu::ResultExt; use table::metadata::TableId; use tokio::sync::broadcast::error::RecvError; @@ -73,7 +73,7 @@ pub struct MetaSrvOptions { pub enable_telemetry: bool, pub data_home: String, pub wal: WalConfig, - pub remote_write: RemoteWriteOptions, + pub system_metric: SystemMetricOption, } impl Default for MetaSrvOptions { @@ -99,7 +99,7 @@ impl Default for MetaSrvOptions { enable_telemetry: true, data_home: METASRV_HOME.to_string(), wal: WalConfig::default(), - remote_write: RemoteWriteOptions::default(), + system_metric: SystemMetricOption::default(), } } } diff --git a/src/servers/src/lib.rs b/src/servers/src/lib.rs index c08003b4481d..7397164b5842 100644 --- a/src/servers/src/lib.rs +++ b/src/servers/src/lib.rs @@ -37,10 +37,10 @@ pub mod postgres; pub mod prom_store; pub mod prometheus_handler; pub mod query_handler; -pub mod remote_writer; mod row_writer; pub mod server; mod shutdown; +pub mod system_metric; pub mod tls; #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] diff --git a/src/servers/src/remote_writer.rs b/src/servers/src/system_metric.rs similarity index 75% rename from src/servers/src/remote_writer.rs rename to src/servers/src/system_metric.rs index 79dac3742133..f7006af2f2c2 100644 --- a/src/servers/src/remote_writer.rs +++ b/src/servers/src/system_metric.rs @@ -29,31 +29,33 @@ use crate::prom_store::snappy_compress; #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] #[serde(default)] -pub struct RemoteWriteOptions { +pub struct SystemMetricOption { pub enable: bool, pub endpoint: String, + pub db: String, #[serde(with = "humantime_serde")] pub write_interval: Duration, } -impl Default for RemoteWriteOptions { +impl Default for SystemMetricOption { fn default() -> Self { Self { enable: false, - endpoint: String::new(), + endpoint: "127.0.0.1:4000".to_string(), + db: String::new(), write_interval: Duration::from_secs(30), } } } #[derive(Default, Clone)] -pub struct RemoteWriteMetricTask { - config: RemoteWriteOptions, +pub struct SystemMetricTask { + config: SystemMetricOption, filter: Option, } -impl RemoteWriteMetricTask { - pub fn try_new(config: &RemoteWriteOptions, plugins: Option<&Plugins>) -> Result> { +impl SystemMetricTask { + pub fn try_new(config: &SystemMetricOption, plugins: Option<&Plugins>) -> Result> { if !config.enable { return Ok(None); } @@ -61,7 +63,13 @@ impl RemoteWriteMetricTask { ensure!( config.write_interval.as_secs() != 0, InvalidRemoteWriteConfigSnafu { - msg: "Expected Remote write write_interval greater than zero" + msg: "Expected System metric write_interval greater than zero" + } + ); + ensure!( + !config.db.is_empty(), + InvalidRemoteWriteConfigSnafu { + msg: "Expected System metric db not empty" } ); Ok(Some(Self { @@ -75,24 +83,27 @@ impl RemoteWriteMetricTask { } let mut interval = time::interval(self.config.write_interval); let sec = self.config.write_interval.as_secs(); - let endpoint = self.config.endpoint.clone(); + let endpoint = format!( + "http://{}/v1/prometheus/write?db={}", + self.config.endpoint, self.config.db + ); let filter = self.filter.clone(); let _handle = common_runtime::spawn_bg(async move { info!( - "Start remote write metric task to endpoint: {}, interval: {}s", + "Start system metric task to endpoint: {}, interval: {}s", endpoint, sec ); // Pass the first tick. Because the first tick completes immediately. interval.tick().await; loop { interval.tick().await; - match report_metric(&endpoint, filter.as_ref()).await { + match write_system_metric(&endpoint, filter.as_ref()).await { Ok(resp) => { if !resp.status().is_success() { - error!("report metric in remote write error, msg: {:#?}", resp); + error!("report system metric error, msg: {:#?}", resp); } } - Err(e) => error!("report metric in remote write failed, error {}", e), + Err(e) => error!("report system metric failed, error {}", e), }; } }); @@ -102,7 +113,7 @@ impl RemoteWriteMetricTask { /// Export the collected metrics, encode metrics into [RemoteWrite format](https://prometheus.io/docs/concepts/remote_write_spec/), /// and send metrics to Prometheus remote-write compatible receiver (e.g. `greptimedb`) specified by `url`. /// User could use `MetricFilter` to filter metric they don't want collect -pub async fn report_metric(url: &str, filter: Option<&MetricFilter>) -> Result { +pub async fn write_system_metric(url: &str, filter: Option<&MetricFilter>) -> Result { let metric_families = prometheus::gather(); let request = convert_metric_to_write_request( metric_families, diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index ef2f6bb823e4..c8ffc91efc5b 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -753,8 +753,9 @@ timeout = "10s" connect_timeout = "1s" tcp_nodelay = true -[frontend.remote_write] +[frontend.system_metric] enable = false +db = "" write_interval = "30s" [datanode] @@ -811,8 +812,9 @@ parallel_scan_channel_size = 32 [datanode.logging] enable_otlp_tracing = false -[datanode.remote_write] +[datanode.system_metric] enable = false +db = "" write_interval = "30s" [logging]