diff --git a/e2e_test/batch/explain.slt b/e2e_test/batch/explain.slt index c3f4bfaf32f0e..1f9cebdf1b68b 100644 --- a/e2e_test/batch/explain.slt +++ b/e2e_test/batch/explain.slt @@ -11,4 +11,23 @@ statement ok drop table t; statement error Not supported: EXPLAIN CREATE VIEW -explain create view v as select 1; \ No newline at end of file +explain create view v as select 1; + +query error +explain (trace, format json) select 1; +---- + db error: ERROR: Failed to run the query + +Caused by: + Not supported: EXPLAIN (TRACE, JSON FORMAT) +HINT: Only EXPLAIN (LOGICAL | PHYSICAL, JSON FORMAT) is supported. + + +query error +explain (distsql, format json) select 1; +---- + db error: ERROR: Failed to run the query + +Caused by: + Not supported: EXPLAIN (TRACE, JSON FORMAT) +HINT: Only EXPLAIN (LOGICAL | PHYSICAL, JSON FORMAT) is supported. \ No newline at end of file diff --git a/src/frontend/planner_test/tests/testdata/input/explain_json_format.yaml b/src/frontend/planner_test/tests/testdata/input/explain_json_format.yaml new file mode 100644 index 0000000000000..c3dfe86b1c61a --- /dev/null +++ b/src/frontend/planner_test/tests/testdata/input/explain_json_format.yaml @@ -0,0 +1,18 @@ +- name: test json output format (logical) + sql: | + CREATE TABLE t (v1 int); + explain (logical, format json) SELECT approx_percentile(0.5) WITHIN GROUP (order by v1) from t; + expected_outputs: + - explain_output +- name: test json output format (batch) + sql: | + CREATE TABLE t (v1 int); + explain (physical, format json) SELECT approx_percentile(0.5) WITHIN GROUP (order by v1) from t; + expected_outputs: + - explain_output +- name: test json output format (stream) + sql: | + CREATE TABLE t (v1 int); + explain (physical, format json) create materialized view m1 as SELECT approx_percentile(0.5) WITHIN GROUP (order by v1) from t; + expected_outputs: + - explain_output \ No newline at end of file diff --git a/src/frontend/planner_test/tests/testdata/output/explain_json_format.yaml b/src/frontend/planner_test/tests/testdata/output/explain_json_format.yaml new file mode 100644 index 0000000000000..9017c0609c253 --- /dev/null +++ b/src/frontend/planner_test/tests/testdata/output/explain_json_format.yaml @@ -0,0 +1,147 @@ +# This file is automatically generated. See `src/frontend/planner_test/README.md` for more information. +- name: test json output format (logical) + sql: | + CREATE TABLE t (v1 int); + explain (logical, format json) SELECT approx_percentile(0.5) WITHIN GROUP (order by v1) from t; + explain_output: | + { + "name": "LogicalAgg", + "fields": { + "aggs": [ + "approx_percentile($expr1)" + ] + }, + "children": [ + { + "name": "LogicalProject", + "fields": { + "exprs": [ + "t.v1::Float64 as $expr1" + ] + }, + "children": [ + { + "name": "LogicalScan", + "fields": { + "columns": [ + "v1" + ], + "table": "t" + }, + "children": [] + } + ] + } + ] + } +- name: test json output format (batch) + sql: | + CREATE TABLE t (v1 int); + explain (physical, format json) SELECT approx_percentile(0.5) WITHIN GROUP (order by v1) from t; + explain_output: | + { + "name": "BatchSimpleAgg", + "fields": { + "aggs": [ + "approx_percentile($expr1)" + ] + }, + "children": [ + { + "name": "BatchExchange", + "fields": { + "dist": "Single", + "order": [] + }, + "children": [ + { + "name": "BatchProject", + "fields": { + "exprs": [ + "t.v1::Float64 as $expr1" + ] + }, + "children": [ + { + "name": "BatchScan", + "fields": { + "columns": [ + "v1" + ], + "table": "t" + }, + "children": [] + } + ] + } + ] + } + ] + } +- name: test json output format (stream) + sql: | + CREATE TABLE t (v1 int); + explain (physical, format json) create materialized view m1 as SELECT approx_percentile(0.5) WITHIN GROUP (order by v1) from t; + explain_output: | + { + "name": "StreamMaterialize", + "fields": { + "columns": [ + "approx_percentile" + ], + "pk_columns": [], + "pk_conflict": "NoCheck", + "stream_key": [] + }, + "children": [ + { + "name": "StreamGlobalApproxPercentile", + "fields": { + "quantile": "0.5:Float64", + "relative_error": "0.01:Float64" + }, + "children": [ + { + "name": "StreamExchange", + "fields": { + "dist": "Single" + }, + "children": [ + { + "name": "StreamLocalApproxPercentile", + "fields": { + "percentile_col": "$expr1", + "quantile": "0.5:Float64", + "relative_error": "0.01:Float64" + }, + "children": [ + { + "name": "StreamProject", + "fields": { + "exprs": [ + "t.v1::Float64 as $expr1", + "t._row_id" + ] + }, + "children": [ + { + "name": "StreamTableScan", + "fields": { + "columns": [ + "v1", + "_row_id" + ], + "table": "t" + }, + "children": [] + } + ] + } + ] + } + ] + } + ] + } + ] + } diff --git a/src/frontend/src/handler/explain.rs b/src/frontend/src/handler/explain.rs index ed22462a66888..6f2569e83fb26 100644 --- a/src/frontend/src/handler/explain.rs +++ b/src/frontend/src/handler/explain.rs @@ -16,7 +16,7 @@ use pgwire::pg_response::{PgResponse, StatementType}; use risingwave_batch::worker_manager::worker_node_manager::WorkerNodeSelector; use risingwave_common::bail_not_implemented; use risingwave_common::types::Fields; -use risingwave_sqlparser::ast::{ExplainOptions, ExplainType, Statement}; +use risingwave_sqlparser::ast::{ExplainFormat, ExplainOptions, ExplainType, Statement}; use thiserror_ext::AsReport; use super::create_index::{gen_create_index_plan, resolve_index_schema}; @@ -179,6 +179,7 @@ async fn do_handle_explain( let explain_trace = context.is_explain_trace(); let explain_verbose = context.is_explain_verbose(); let explain_type = context.explain_type(); + let explain_format = context.explain_format(); if explain_trace { let trace = context.take_trace(); @@ -212,7 +213,10 @@ async fn do_handle_explain( ExplainType::Physical => { // if explain trace is on, the plan has been in the rows if !explain_trace && let Ok(plan) = &plan { - blocks.push(plan.explain_to_string()); + match explain_format { + ExplainFormat::Text => blocks.push(plan.explain_to_string()), + ExplainFormat::Json => blocks.push(plan.explain_to_json()), + } } } ExplainType::Logical => { @@ -248,6 +252,21 @@ pub async fn handle_explain( if analyze { bail_not_implemented!(issue = 4856, "explain analyze"); } + if options.trace && options.explain_format == ExplainFormat::Json { + return Err(ErrorCode::NotSupported( + "EXPLAIN (TRACE, JSON FORMAT)".to_string(), + "Only EXPLAIN (LOGICAL | PHYSICAL, JSON FORMAT) is supported.".to_string(), + ) + .into()); + } + if options.explain_type == ExplainType::DistSql && options.explain_format == ExplainFormat::Json + { + return Err(ErrorCode::NotSupported( + "EXPLAIN (TRACE, JSON FORMAT)".to_string(), + "Only EXPLAIN (LOGICAL | PHYSICAL, JSON FORMAT) is supported.".to_string(), + ) + .into()); + } let mut blocks = Vec::new(); let result = do_handle_explain(handler_args, options.clone(), stmt, &mut blocks).await; diff --git a/src/frontend/src/optimizer/logical_optimization.rs b/src/frontend/src/optimizer/logical_optimization.rs index a03bb95a0d51e..8a508c577a6fe 100644 --- a/src/frontend/src/optimizer/logical_optimization.rs +++ b/src/frontend/src/optimizer/logical_optimization.rs @@ -108,6 +108,8 @@ impl OptimizationStage { use std::sync::LazyLock; +use risingwave_sqlparser::ast::ExplainFormat; + pub struct LogicalOptimizer {} static DAG_TO_TREE: LazyLock = LazyLock::new(|| { @@ -684,7 +686,14 @@ impl LogicalOptimizer { InputRefValidator.validate(plan.clone()); if ctx.is_explain_logical() { - ctx.store_logical(plan.explain_to_string()); + match ctx.explain_format() { + ExplainFormat::Text => { + ctx.store_logical(plan.explain_to_string()); + } + ExplainFormat::Json => { + ctx.store_logical(plan.explain_to_json()); + } + } } Ok(plan) @@ -789,7 +798,14 @@ impl LogicalOptimizer { InputRefValidator.validate(plan.clone()); if ctx.is_explain_logical() { - ctx.store_logical(plan.explain_to_string()); + match ctx.explain_format() { + ExplainFormat::Text => { + ctx.store_logical(plan.explain_to_string()); + } + ExplainFormat::Json => { + ctx.store_logical(plan.explain_to_json()); + } + } } Ok(plan) diff --git a/src/frontend/src/optimizer/optimizer_context.rs b/src/frontend/src/optimizer/optimizer_context.rs index 35c5a3ee9129b..75af6a3b3da69 100644 --- a/src/frontend/src/optimizer/optimizer_context.rs +++ b/src/frontend/src/optimizer/optimizer_context.rs @@ -19,7 +19,7 @@ use std::marker::PhantomData; use std::rc::Rc; use std::sync::Arc; -use risingwave_sqlparser::ast::{ExplainOptions, ExplainType}; +use risingwave_sqlparser::ast::{ExplainFormat, ExplainOptions, ExplainType}; use crate::binder::ShareId; use crate::expr::{CorrelatedId, SessionTimezone}; @@ -176,6 +176,10 @@ impl OptimizerContext { self.explain_options.explain_type.clone() } + pub fn explain_format(&self) -> ExplainFormat { + self.explain_options.explain_format.clone() + } + pub fn is_explain_logical(&self) -> bool { self.explain_type() == ExplainType::Logical } diff --git a/src/frontend/src/optimizer/plan_node/mod.rs b/src/frontend/src/optimizer/plan_node/mod.rs index cfcc4f0709569..9b814ab8289c2 100644 --- a/src/frontend/src/optimizer/plan_node/mod.rs +++ b/src/frontend/src/optimizer/plan_node/mod.rs @@ -53,6 +53,7 @@ use super::property::{Distribution, FunctionalDependencySet, MonotonicityMap, Or use crate::error::{ErrorCode, Result}; use crate::optimizer::ExpressionSimplifyRewriter; use crate::session::current::notice_to_user; +use crate::utils::PrettySerde; /// A marker trait for different conventions, used for enforcing type safety. /// @@ -643,6 +644,9 @@ pub trait Explain { /// Explain the plan node and return a string. fn explain_to_string(&self) -> String; + + /// Explain the plan node and return a json string. + fn explain_to_json(&self) -> String; } impl Explain for PlanRef { @@ -665,6 +669,14 @@ impl Explain for PlanRef { config.unicode(&mut output, &plan.explain()); output } + + /// Explain the plan node and return a json string. + fn explain_to_json(&self) -> String { + let plan = reorganize_elements_id(self.clone()); + let explain_ir = plan.explain(); + serde_json::to_string_pretty(&PrettySerde(explain_ir)) + .expect("failed to serialize plan to json") + } } pub(crate) fn pretty_config() -> PrettyConfig { diff --git a/src/frontend/src/utils/mod.rs b/src/frontend/src/utils/mod.rs index ca2a90e4e38e7..c1556f30b1caf 100644 --- a/src/frontend/src/utils/mod.rs +++ b/src/frontend/src/utils/mod.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod pretty_serde; +pub use pretty_serde::PrettySerde; mod column_index_mapping; use std::any::Any; use std::hash::{Hash, Hasher}; diff --git a/src/frontend/src/utils/pretty_serde.rs b/src/frontend/src/utils/pretty_serde.rs new file mode 100644 index 0000000000000..37959ce3eb975 --- /dev/null +++ b/src/frontend/src/utils/pretty_serde.rs @@ -0,0 +1,104 @@ +// Copyright 2024 RisingWave Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! @kwannoel: +//! This module implements Serde for the Pretty struct. Why not implement it directly on our plan nodes? +//! That's because Pretty already summarizes the fields that are important to us. +//! You can see that when `explain()` is called, we directly return the `Pretty` struct. +//! The _proper way_ to do this would be to create a new data structure that plan nodes get converted into, +//! and then implement `Serialize` and `Deserialize` on that data structure (including to `Pretty`). +//! But that's a lot of refactoring work. +//! So we just wrap `Pretty` in a newtype and implement `Serialize` on that, +//! since it's a good enough intermediate representation. + +use std::collections::BTreeMap; + +use pretty_xmlish::Pretty; +use serde::ser::{SerializeSeq, SerializeStruct}; +use serde::{Serialize, Serializer}; + +pub struct PrettySerde<'a>(pub Pretty<'a>); + +impl<'a> Serialize for PrettySerde<'a> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match &self.0 { + Pretty::Text(text) => serializer.serialize_str(text.as_ref()), + + Pretty::Record(node) => { + let mut state = serializer.serialize_struct("XmlNode", 3)?; + state.serialize_field("name", node.name.as_ref())?; + state.serialize_field( + "fields", + &node + .fields + .iter() + .map(|(k, v)| (k.as_ref(), PrettySerde(v.clone()))) + .collect::>(), + )?; + state.serialize_field( + "children", + &node + .children + .iter() + .map(|c| PrettySerde(c.clone())) + .collect::>(), + )?; + state.end() + } + + Pretty::Array(elements) => { + let mut seq = serializer.serialize_seq(Some(elements.len()))?; + for element in elements { + seq.serialize_element(&PrettySerde((*element).clone()))?; + } + seq.end() + } + + Pretty::Linearized(inner, size) => { + let mut state = serializer.serialize_struct("Linearized", 2)?; + state.serialize_field("inner", &PrettySerde((**inner).clone()))?; + state.serialize_field("size", size)?; + state.end() + } + } + } +} + +#[cfg(test)] +mod tests { + use std::fmt::Debug; + + use expect_test::{expect, Expect}; + + use super::*; + + fn check(actual: impl Debug, expect: Expect) { + let actual = format!("{:#?}", actual); + expect.assert_eq(&actual); + } + + #[test] + fn test_pretty_serde() { + let pretty = Pretty::childless_record("root", vec![("a", Pretty::Text("1".into()))]); + let pretty_serde = PrettySerde(pretty); + let serialized = serde_json::to_string(&pretty_serde).unwrap(); + check( + serialized, + expect![[r#""{\"name\":\"root\",\"fields\":{\"a\":\"1\"},\"children\":[]}""#]], + ); + } +} diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index 80353231788a4..5dcea9c339d87 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -1134,6 +1134,22 @@ impl fmt::Display for ExplainType { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum ExplainFormat { + Text, + Json, +} + +impl fmt::Display for ExplainFormat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ExplainFormat::Text => f.write_str("TEXT"), + ExplainFormat::Json => f.write_str("JSON"), + } + } +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct ExplainOptions { @@ -1143,6 +1159,8 @@ pub struct ExplainOptions { pub trace: bool, // explain's plan type pub explain_type: ExplainType, + // explain's plan format + pub explain_format: ExplainFormat, } impl Default for ExplainOptions { @@ -1151,6 +1169,7 @@ impl Default for ExplainOptions { verbose: false, trace: false, explain_type: ExplainType::Physical, + explain_format: ExplainFormat::Text, } } } @@ -1171,6 +1190,9 @@ impl fmt::Display for ExplainOptions { if self.explain_type == default.explain_type { option_strs.push(self.explain_type.to_string()); } + if self.explain_format == default.explain_format { + option_strs.push(self.explain_format.to_string()); + } write!(f, "{}", option_strs.iter().format(",")) } } diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index 14c19b829bb7c..4874e5320056d 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -4015,6 +4015,7 @@ impl Parser<'_> { Keyword::LOGICAL, Keyword::PHYSICAL, Keyword::DISTSQL, + Keyword::FORMAT, ]; let parse_explain_option = |parser: &mut Parser<'_>| -> PResult<()> { @@ -4038,6 +4039,15 @@ impl Parser<'_> { Keyword::LOGICAL => options.explain_type = ExplainType::Logical, Keyword::PHYSICAL => options.explain_type = ExplainType::Physical, Keyword::DISTSQL => options.explain_type = ExplainType::DistSql, + Keyword::FORMAT => { + options.explain_format = { + match parser.expect_one_of_keywords(&[Keyword::TEXT, Keyword::JSON])? { + Keyword::TEXT => ExplainFormat::Text, + Keyword::JSON => ExplainFormat::Json, + _ => unreachable!("{}", keyword), + } + } + } _ => unreachable!("{}", keyword), }; Ok(()) diff --git a/src/sqlparser/tests/sqlparser_common.rs b/src/sqlparser/tests/sqlparser_common.rs index e6b29d632fe25..c8f6fb41d32a9 100644 --- a/src/sqlparser/tests/sqlparser_common.rs +++ b/src/sqlparser/tests/sqlparser_common.rs @@ -1858,6 +1858,7 @@ fn parse_explain_analyze_with_simple_select() { trace: true, verbose: true, explain_type: ExplainType::DistSql, + explain_format: ExplainFormat::Text, }, ); run_explain_analyze( @@ -1867,6 +1868,7 @@ fn parse_explain_analyze_with_simple_select() { trace: false, verbose: true, explain_type: ExplainType::DistSql, + explain_format: ExplainFormat::Text, }, ); run_explain_analyze( @@ -1876,6 +1878,17 @@ fn parse_explain_analyze_with_simple_select() { trace: false, verbose: true, explain_type: ExplainType::DistSql, + explain_format: ExplainFormat::Text, + }, + ); + run_explain_analyze( + "EXPLAIN (LOGICAL, FORMAT JSON) SELECT sqrt(id) FROM foo", + false, + ExplainOptions { + trace: false, + verbose: false, + explain_type: ExplainType::Logical, + explain_format: ExplainFormat::Json, }, ); } @@ -1893,9 +1906,15 @@ fn parse_explain_with_invalid_options() { let res = parse_sql_statements("EXPLAIN (VERBOSE, ) SELECT sqrt(id) FROM foo"); - let err_msg = - "expected one of VERBOSE or TRACE or TYPE or LOGICAL or PHYSICAL or DISTSQL, found: )"; - assert!(format!("{}", res.unwrap_err()).contains(err_msg)); + let expected = + "expected one of VERBOSE or TRACE or TYPE or LOGICAL or PHYSICAL or DISTSQL or FORMAT, found: )"; + let actual = res.unwrap_err().to_string(); + assert!( + actual.contains(expected), + "expected: {:?}\nactual: {:?}", + expected, + actual + ); } #[test]