diff --git a/Cargo.lock b/Cargo.lock index a6a1e0bc2828..b0806d4b4a2b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5265,6 +5265,7 @@ dependencies = [ "common-runtime", "common-telemetry", "common-time", + "common-version", "common-wal", "dashmap", "datatypes", diff --git a/src/catalog/src/error.rs b/src/catalog/src/error.rs index 8391dab045c8..20cac754b9f0 100644 --- a/src/catalog/src/error.rs +++ b/src/catalog/src/error.rs @@ -49,6 +49,12 @@ pub enum Error { source: BoxedError, }, + #[snafu(display("Failed to list nodes in cluster: {source}"))] + ListNodes { + location: Location, + source: BoxedError, + }, + #[snafu(display("Failed to re-compile script due to internal error"))] CompileScriptInternal { location: Location, @@ -294,6 +300,7 @@ impl ErrorExt for Error { } Error::ListCatalogs { source, .. } + | Error::ListNodes { source, .. } | Error::ListSchemas { source, .. } | Error::ListTables { source, .. } => source.status_code(), diff --git a/src/catalog/src/information_schema.rs b/src/catalog/src/information_schema.rs index 8d488212fc45..e482a16e77de 100644 --- a/src/catalog/src/information_schema.rs +++ b/src/catalog/src/information_schema.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod cluster_info; pub mod columns; pub mod key_column_usage; mod memory_table; @@ -23,6 +24,7 @@ pub mod schemata; mod table_constraints; mod table_names; pub mod tables; +pub(crate) mod utils; use std::collections::HashMap; use std::sync::{Arc, Weak}; @@ -47,6 +49,7 @@ pub use table_names::*; use self::columns::InformationSchemaColumns; use crate::error::Result; +use crate::information_schema::cluster_info::InformationSchemaClusterInfo; use crate::information_schema::key_column_usage::InformationSchemaKeyColumnUsage; use crate::information_schema::memory_table::{get_schema_columns, MemoryTable}; use crate::information_schema::partitions::InformationSchemaPartitions; @@ -179,6 +182,10 @@ impl InformationSchemaProvider { TABLE_CONSTRAINTS.to_string(), self.build_table(TABLE_CONSTRAINTS).unwrap(), ); + tables.insert( + CLUSTER_INFO.to_string(), + self.build_table(CLUSTER_INFO).unwrap(), + ); // Add memory tables for name in MEMORY_TABLES.iter() { @@ -251,6 +258,9 @@ impl InformationSchemaProvider { self.catalog_name.clone(), self.catalog_manager.clone(), )) as _), + CLUSTER_INFO => Some(Arc::new(InformationSchemaClusterInfo::new( + self.catalog_manager.clone(), + )) as _), _ => None, } } diff --git a/src/catalog/src/information_schema/runtime_metrics.rs b/src/catalog/src/information_schema/runtime_metrics.rs index e8ff1d95cb36..f665b9c80eab 100644 --- a/src/catalog/src/information_schema/runtime_metrics.rs +++ b/src/catalog/src/information_schema/runtime_metrics.rs @@ -45,8 +45,8 @@ pub(super) struct InformationSchemaMetrics { const METRIC_NAME: &str = "metric_name"; const METRIC_VALUE: &str = "value"; const METRIC_LABELS: &str = "labels"; -const NODE: &str = "node"; -const NODE_TYPE: &str = "node_type"; +const PEER_ADDR: &str = "peer_addr"; +const PEER_TYPE: &str = "peer_type"; const TIMESTAMP: &str = "timestamp"; /// The `information_schema.runtime_metrics` virtual table. @@ -63,8 +63,8 @@ impl InformationSchemaMetrics { ColumnSchema::new(METRIC_NAME, ConcreteDataType::string_datatype(), false), ColumnSchema::new(METRIC_VALUE, ConcreteDataType::float64_datatype(), false), ColumnSchema::new(METRIC_LABELS, ConcreteDataType::string_datatype(), true), - ColumnSchema::new(NODE, ConcreteDataType::string_datatype(), true), - ColumnSchema::new(NODE_TYPE, ConcreteDataType::string_datatype(), false), + ColumnSchema::new(PEER_ADDR, ConcreteDataType::string_datatype(), true), + ColumnSchema::new(PEER_TYPE, ConcreteDataType::string_datatype(), false), ColumnSchema::new( TIMESTAMP, ConcreteDataType::timestamp_millisecond_datatype(), @@ -119,8 +119,8 @@ struct InformationSchemaMetricsBuilder { metric_names: StringVectorBuilder, metric_values: Float64VectorBuilder, metric_labels: StringVectorBuilder, - nodes: StringVectorBuilder, - node_types: StringVectorBuilder, + peer_addrs: StringVectorBuilder, + peer_types: StringVectorBuilder, } impl InformationSchemaMetricsBuilder { @@ -130,8 +130,8 @@ impl InformationSchemaMetricsBuilder { metric_names: StringVectorBuilder::with_capacity(42), metric_values: Float64VectorBuilder::with_capacity(42), metric_labels: StringVectorBuilder::with_capacity(42), - nodes: StringVectorBuilder::with_capacity(42), - node_types: StringVectorBuilder::with_capacity(42), + peer_addrs: StringVectorBuilder::with_capacity(42), + peer_types: StringVectorBuilder::with_capacity(42), } } @@ -140,14 +140,14 @@ impl InformationSchemaMetricsBuilder { metric_name: &str, labels: String, metric_value: f64, - node: Option<&str>, - node_type: &str, + peer: Option<&str>, + peer_type: &str, ) { self.metric_names.push(Some(metric_name)); self.metric_values.push(Some(metric_value)); self.metric_labels.push(Some(&labels)); - self.nodes.push(node); - self.node_types.push(Some(node_type)); + self.peer_addrs.push(peer); + self.peer_types.push(Some(peer_type)); } async fn make_metrics(&mut self, _request: Option) -> Result { @@ -184,9 +184,9 @@ impl InformationSchemaMetricsBuilder { .join(", "), // Safety: always has a sample ts.samples[0].value, - // TODO(dennis): fetching other nodes metrics + // TODO(dennis): fetching other peers metrics - // The node column is always `None` for standalone + // The peer column is always `None` for standalone None, "standalone", ); @@ -209,8 +209,8 @@ impl InformationSchemaMetricsBuilder { Arc::new(self.metric_names.finish()), Arc::new(self.metric_values.finish()), Arc::new(self.metric_labels.finish()), - Arc::new(self.nodes.finish()), - Arc::new(self.node_types.finish()), + Arc::new(self.peer_addrs.finish()), + Arc::new(self.peer_types.finish()), timestamps, ]; @@ -247,7 +247,7 @@ mod tests { #[tokio::test] async fn test_make_metrics() { - let metrics = InformationSchemaMetrics::new(Mode::Standalone, None); + let metrics = InformationSchemaMetrics::new(); let stream = metrics.to_stream(ScanRequest::default()).unwrap(); @@ -258,8 +258,8 @@ mod tests { assert!(result_literal.contains(METRIC_NAME)); assert!(result_literal.contains(METRIC_VALUE)); assert!(result_literal.contains(METRIC_LABELS)); - assert!(result_literal.contains(NODE)); - assert!(result_literal.contains(NODE_TYPE)); + assert!(result_literal.contains(PEER_ADDR)); + assert!(result_literal.contains(PEER_TYPE)); assert!(result_literal.contains(TIMESTAMP)); } } diff --git a/src/catalog/src/information_schema/table_names.rs b/src/catalog/src/information_schema/table_names.rs index bed3aae60088..cdc695646a9d 100644 --- a/src/catalog/src/information_schema/table_names.rs +++ b/src/catalog/src/information_schema/table_names.rs @@ -42,3 +42,4 @@ pub const RUNTIME_METRICS: &str = "runtime_metrics"; pub const PARTITIONS: &str = "partitions"; pub const REGION_PEERS: &str = "greptime_region_peers"; pub const TABLE_CONSTRAINTS: &str = "table_constraints"; +pub const CLUSTER_INFO: &str = "cluster_info"; diff --git a/src/common/catalog/src/consts.rs b/src/common/catalog/src/consts.rs index 8834b6239f91..9d0623795738 100644 --- a/src/common/catalog/src/consts.rs +++ b/src/common/catalog/src/consts.rs @@ -88,6 +88,8 @@ pub const INFORMATION_SCHEMA_PARTITIONS_TABLE_ID: u32 = 28; pub const INFORMATION_SCHEMA_REGION_PEERS_TABLE_ID: u32 = 29; /// id for information_schema.columns pub const INFORMATION_SCHEMA_TABLE_CONSTRAINTS_TABLE_ID: u32 = 30; +/// id for information_schema.cluster_info +pub const INFORMATION_SCHEMA_CLUSTER_INFO_TABLE_ID: u32 = 31; /// ----- End of information_schema tables ----- pub const MITO_ENGINE: &str = "mito"; diff --git a/src/common/meta/src/cluster.rs b/src/common/meta/src/cluster.rs index ba3aecea2678..5dc8a640148d 100644 --- a/src/common/meta/src/cluster.rs +++ b/src/common/meta/src/cluster.rs @@ -86,6 +86,10 @@ pub struct NodeInfo { pub last_activity_ts: i64, /// The status of the node. Different roles have different node status. pub status: NodeStatus, + // The node build version + pub version: String, + // The node build git commit hash + pub git_commit: String, } #[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)] @@ -100,6 +104,19 @@ pub enum NodeStatus { Datanode(DatanodeStatus), Frontend(FrontendStatus), Metasrv(MetasrvStatus), + Standalone, +} + +impl NodeStatus { + // Get the role name of the node status + pub fn role_name(&self) -> &str { + match self { + NodeStatus::Datanode(_) => "DATANODE", + NodeStatus::Frontend(_) => "FRONTEND", + NodeStatus::Metasrv(_) => "METASRV", + NodeStatus::Standalone => "STANDALONE", + } + } } /// The status of a datanode. @@ -271,6 +288,8 @@ mod tests { leader_regions: 3, follower_regions: 4, }), + version: "".to_string(), + git_commit: "".to_string(), }; let node_info_bytes: Vec = node_info.try_into().unwrap(); @@ -287,6 +306,7 @@ mod tests { leader_regions: 3, follower_regions: 4, }), + .. } ); } diff --git a/src/meta-client/src/client.rs b/src/meta-client/src/client.rs index cb8341b3a88a..1a57fe2a51e9 100644 --- a/src/meta-client/src/client.rs +++ b/src/meta-client/src/client.rs @@ -264,6 +264,9 @@ impl ClusterInfo for MetaClient { let mut nodes = if get_metasrv_nodes { let last_activity_ts = -1; // Metasrv does not provide this information. + // TODO(dennis): Get Metasrv build info + let git_commit = "unknown"; + let version = "unknown"; let (leader, followers) = cluster_client.get_metasrv_peers().await?; followers .into_iter() @@ -271,11 +274,15 @@ impl ClusterInfo for MetaClient { peer, last_activity_ts, status: NodeStatus::Metasrv(MetasrvStatus { is_leader: false }), + version: version.to_string(), + git_commit: git_commit.to_string(), }) .chain(leader.into_iter().map(|leader| NodeInfo { peer: leader, last_activity_ts, status: NodeStatus::Metasrv(MetasrvStatus { is_leader: true }), + version: version.to_string(), + git_commit: git_commit.to_string(), })) .collect::>() } else { diff --git a/src/meta-client/src/client/cluster.rs b/src/meta-client/src/client/cluster.rs index 9ece92177aae..56b4d44ce780 100644 --- a/src/meta-client/src/client/cluster.rs +++ b/src/meta-client/src/client/cluster.rs @@ -146,6 +146,7 @@ impl Inner { { let ask_leader = self.ask_leader()?; let mut times = 0; + let mut last_error = None; while times < self.max_retry { if let Some(leader) = &ask_leader.get_leader() { @@ -153,6 +154,7 @@ impl Inner { match body_fn(client).await { Ok(res) => { if util::is_not_leader(get_header(&res)) { + last_error = Some(format!("{leader} is not a leader")); warn!("Failed to {task} to {leader}, not a leader"); let leader = ask_leader.ask_leader().await?; info!("Cluster client updated to new leader addr: {leader}"); @@ -164,6 +166,7 @@ impl Inner { Err(status) => { // The leader may be unreachable. if util::is_unreachable(&status) { + last_error = Some(status.to_string()); warn!("Failed to {task} to {leader}, source: {status}"); let leader = ask_leader.ask_leader().await?; info!("Cluster client updated to new leader addr: {leader}"); @@ -180,7 +183,7 @@ impl Inner { } RetryTimesExceededSnafu { - msg: "Failed to {task}", + msg: format!("Failed to {task}, last error: {:?}", last_error), times: self.max_retry, } .fail() diff --git a/src/meta-client/src/client/procedure.rs b/src/meta-client/src/client/procedure.rs index 20cd5385a872..32049dbabdf0 100644 --- a/src/meta-client/src/client/procedure.rs +++ b/src/meta-client/src/client/procedure.rs @@ -162,6 +162,7 @@ impl Inner { { let ask_leader = self.ask_leader()?; let mut times = 0; + let mut last_error = None; while times < self.max_retry { if let Some(leader) = &ask_leader.get_leader() { @@ -169,6 +170,7 @@ impl Inner { match body_fn(client).await { Ok(res) => { if util::is_not_leader(get_header(&res)) { + last_error = Some(format!("{leader} is not a leader")); warn!("Failed to {task} to {leader}, not a leader"); let leader = ask_leader.ask_leader().await?; info!("DDL client updated to new leader addr: {leader}"); @@ -180,6 +182,7 @@ impl Inner { Err(status) => { // The leader may be unreachable. if util::is_unreachable(&status) { + last_error = Some(status.to_string()); warn!("Failed to {task} to {leader}, source: {status}"); let leader = ask_leader.ask_leader().await?; info!("Procedure client updated to new leader addr: {leader}"); @@ -196,7 +199,7 @@ impl Inner { } error::RetryTimesExceededSnafu { - msg: "Failed to {task}", + msg: format!("Failed to {task}, last error: {:?}", last_error), times: self.max_retry, } .fail() diff --git a/src/meta-srv/Cargo.toml b/src/meta-srv/Cargo.toml index 137e45f57a6a..28468247de60 100644 --- a/src/meta-srv/Cargo.toml +++ b/src/meta-srv/Cargo.toml @@ -25,6 +25,7 @@ common-procedure.workspace = true common-runtime.workspace = true common-telemetry.workspace = true common-time.workspace = true +common-version.workspace = true common-wal.workspace = true dashmap.workspace = true datatypes.workspace = true diff --git a/src/meta-srv/src/handler/collect_cluster_info_handler.rs b/src/meta-srv/src/handler/collect_cluster_info_handler.rs index 48edc4504075..db4ffb9c9984 100644 --- a/src/meta-srv/src/handler/collect_cluster_info_handler.rs +++ b/src/meta-srv/src/handler/collect_cluster_info_handler.rs @@ -44,10 +44,13 @@ impl HeartbeatHandler for CollectFrontendClusterInfoHandler { return Ok(HandleControl::Continue); }; + let build_info = common_version::build_info(); let value = NodeInfo { peer, last_activity_ts: common_time::util::current_time_millis(), status: NodeStatus::Frontend(FrontendStatus {}), + version: build_info.version.to_string(), + git_commit: build_info.commit.to_string(), }; save_to_mem_store(key, value, ctx).await?; @@ -86,6 +89,8 @@ impl HeartbeatHandler for CollectDatanodeClusterInfoHandler { .count(); let follower_regions = stat.region_stats.len() - leader_regions; + let build_info = common_version::build_info(); + let value = NodeInfo { peer, last_activity_ts: stat.timestamp_millis, @@ -95,6 +100,8 @@ impl HeartbeatHandler for CollectDatanodeClusterInfoHandler { leader_regions, follower_regions, }), + version: build_info.version.to_string(), + git_commit: build_info.commit.to_string(), }; save_to_mem_store(key, value, ctx).await?; diff --git a/tests/cases/distributed/information_schema/cluster_info.result b/tests/cases/distributed/information_schema/cluster_info.result new file mode 100644 index 000000000000..4492b92801ef --- /dev/null +++ b/tests/cases/distributed/information_schema/cluster_info.result @@ -0,0 +1,35 @@ +USE INFORMATION_SCHEMA; + +Affected Rows: 0 + +DESC TABLE CLUSTER_INFO; + ++------------+--------+-----+------+---------+---------------+ +| Column | Type | Key | Null | Default | Semantic Type | ++------------+--------+-----+------+---------+---------------+ +| peer_id | UInt64 | | NO | | FIELD | +| peer_type | String | | NO | | FIELD | +| peer_addr | String | | YES | | FIELD | +| version | String | | NO | | FIELD | +| git_commit | String | | NO | | FIELD | ++------------+--------+-----+------+---------+---------------+ + +SELECT * FROM CLUSTER_INFO; + +Error: 3001(EngineExecuteQuery), Internal error: Failed to list nodes in cluster: Retry exceeded max times(3), message: Failed to get_metasrv_peers, last error: Some("127.0.0.1:3002 is not a leader"). +This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker + +SELECT * FROM CLUSTER_INFO WHERE PEER_TYPE = 'METASRV'; + +Error: 3001(EngineExecuteQuery), Internal error: Failed to list nodes in cluster: Retry exceeded max times(3), message: Failed to get_metasrv_peers, last error: Some("127.0.0.1:3002 is not a leader"). +This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker + +SELECT * FROM CLUSTER_INFO WHERE PEER_ID > 1; + +Error: 3001(EngineExecuteQuery), Internal error: Failed to list nodes in cluster: Retry exceeded max times(3), message: Failed to get_metasrv_peers, last error: Some("127.0.0.1:3002 is not a leader"). +This was likely caused by a bug in DataFusion's code and we would welcome that you file an bug report in our issue tracker + +USE PUBLIC; + +Affected Rows: 0 + diff --git a/tests/cases/distributed/information_schema/cluster_info.sql b/tests/cases/distributed/information_schema/cluster_info.sql new file mode 100644 index 000000000000..9a129ccdef91 --- /dev/null +++ b/tests/cases/distributed/information_schema/cluster_info.sql @@ -0,0 +1,11 @@ +USE INFORMATION_SCHEMA; + +DESC TABLE CLUSTER_INFO; + +SELECT * FROM CLUSTER_INFO; + +SELECT * FROM CLUSTER_INFO WHERE PEER_TYPE = 'METASRV'; + +SELECT * FROM CLUSTER_INFO WHERE PEER_ID > 1; + +USE PUBLIC; diff --git a/tests/cases/standalone/information_schema/cluster_info.result b/tests/cases/standalone/information_schema/cluster_info.result new file mode 100644 index 000000000000..b2a73f48b9ad --- /dev/null +++ b/tests/cases/standalone/information_schema/cluster_info.result @@ -0,0 +1,54 @@ +USE INFORMATION_SCHEMA; + +Affected Rows: 0 + +DESC TABLE CLUSTER_INFO; + ++------------+--------+-----+------+---------+---------------+ +| Column | Type | Key | Null | Default | Semantic Type | ++------------+--------+-----+------+---------+---------------+ +| peer_id | UInt64 | | NO | | FIELD | +| peer_type | String | | NO | | FIELD | +| peer_addr | String | | YES | | FIELD | +| version | String | | NO | | FIELD | +| git_commit | String | | NO | | FIELD | ++------------+--------+-----+------+---------+---------------+ + +SELECT * FROM CLUSTER_INFO; + ++---------+------------+-----------+---------+------------------------------------------+ +| peer_id | peer_type | peer_addr | version | git_commit | ++---------+------------+-----------+---------+------------------------------------------+ +| 0 | STANDALONE | | 0.7.2 | c67f97aed07d2938b41ca9559af5bff48ff3f864 | ++---------+------------+-----------+---------+------------------------------------------+ + +SELECT * FROM CLUSTER_INFO WHERE PEER_TYPE = 'STANDALONE'; + ++---------+------------+-----------+---------+------------------------------------------+ +| peer_id | peer_type | peer_addr | version | git_commit | ++---------+------------+-----------+---------+------------------------------------------+ +| 0 | STANDALONE | | 0.7.2 | c67f97aed07d2938b41ca9559af5bff48ff3f864 | ++---------+------------+-----------+---------+------------------------------------------+ + +SELECT * FROM CLUSTER_INFO WHERE PEER_TYPE != 'STANDALONE'; + +++ +++ + +SELECT * FROM CLUSTER_INFO WHERE PEER_ID = 0; + ++---------+------------+-----------+---------+------------------------------------------+ +| peer_id | peer_type | peer_addr | version | git_commit | ++---------+------------+-----------+---------+------------------------------------------+ +| 0 | STANDALONE | | 0.7.2 | c67f97aed07d2938b41ca9559af5bff48ff3f864 | ++---------+------------+-----------+---------+------------------------------------------+ + +SELECT * FROM CLUSTER_INFO WHERE PEER_ID > 0; + +++ +++ + +USE PUBLIC; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/information_schema/cluster_info.sql b/tests/cases/standalone/information_schema/cluster_info.sql new file mode 100644 index 000000000000..725038dc2419 --- /dev/null +++ b/tests/cases/standalone/information_schema/cluster_info.sql @@ -0,0 +1,15 @@ +USE INFORMATION_SCHEMA; + +DESC TABLE CLUSTER_INFO; + +SELECT * FROM CLUSTER_INFO; + +SELECT * FROM CLUSTER_INFO WHERE PEER_TYPE = 'STANDALONE'; + +SELECT * FROM CLUSTER_INFO WHERE PEER_TYPE != 'STANDALONE'; + +SELECT * FROM CLUSTER_INFO WHERE PEER_ID = 0; + +SELECT * FROM CLUSTER_INFO WHERE PEER_ID > 0; + +USE PUBLIC;