Skip to content

Commit

Permalink
fix: unparser generates wrong sql for derived table with columns (#17) (
Browse files Browse the repository at this point in the history
apache#11505)

* fix unparser for derived table with columns

* refactoring

* renaming

* case in tests
  • Loading branch information
y-f-u authored and wiedld committed Jul 31, 2024
1 parent 7e8224b commit adcf6f4
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 10 deletions.
77 changes: 67 additions & 10 deletions datafusion/sql/src/unparser/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use datafusion_common::{internal_err, not_impl_err, plan_err, DataFusionError, R
use datafusion_expr::{
expr::Alias, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan, Projection,
};
use sqlparser::ast::{self, SetExpr};
use sqlparser::ast::{self, Ident, SetExpr};

use crate::unparser::utils::unproject_agg_exprs;

Expand Down Expand Up @@ -457,15 +457,11 @@ impl Unparser<'_> {
}
LogicalPlan::SubqueryAlias(plan_alias) => {
// Handle bottom-up to allocate relation
self.select_to_sql_recursively(
plan_alias.input.as_ref(),
query,
select,
relation,
)?;
let (plan, columns) = subquery_alias_inner_query_and_columns(plan_alias);

self.select_to_sql_recursively(plan, query, select, relation)?;
relation.alias(Some(
self.new_table_alias(plan_alias.alias.table().to_string()),
self.new_table_alias(plan_alias.alias.table().to_string(), columns),
));

Ok(())
Expand Down Expand Up @@ -599,10 +595,10 @@ impl Unparser<'_> {
self.binary_op_to_sql(lhs, rhs, ast::BinaryOperator::And)
}

fn new_table_alias(&self, alias: String) -> ast::TableAlias {
fn new_table_alias(&self, alias: String, columns: Vec<Ident>) -> ast::TableAlias {
ast::TableAlias {
name: self.new_ident_quoted_if_needs(alias),
columns: Vec::new(),
columns,
}
}

Expand All @@ -611,6 +607,67 @@ impl Unparser<'_> {
}
}

// This logic is to work out the columns and inner query for SubqueryAlias plan for both types of
// subquery
// - `(SELECT column_a as a from table) AS A`
// - `(SELECT column_a from table) AS A (a)`
//
// A roundtrip example for table alias with columns
//
// query: SELECT id FROM (SELECT j1_id from j1) AS c (id)
//
// LogicPlan:
// Projection: c.id
// SubqueryAlias: c
// Projection: j1.j1_id AS id
// Projection: j1.j1_id
// TableScan: j1
//
// Before introducing this logic, the unparsed query would be `SELECT c.id FROM (SELECT j1.j1_id AS
// id FROM (SELECT j1.j1_id FROM j1)) AS c`.
// The query is invalid as `j1.j1_id` is not a valid identifier in the derived table
// `(SELECT j1.j1_id FROM j1)`
//
// With this logic, the unparsed query will be:
// `SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)`
//
// Caveat: this won't handle the case like `select * from (select 1, 2) AS a (b, c)`
// as the parser gives a wrong plan which has mismatch `Int(1)` types: Literal and
// Column in the Projections. Once the parser side is fixed, this logic should work
fn subquery_alias_inner_query_and_columns(
subquery_alias: &datafusion_expr::SubqueryAlias,
) -> (&LogicalPlan, Vec<Ident>) {
let plan: &LogicalPlan = subquery_alias.input.as_ref();

let LogicalPlan::Projection(outer_projections) = plan else {
return (plan, vec![]);
};

// check if it's projection inside projection
let LogicalPlan::Projection(inner_projection) = outer_projections.input.as_ref()
else {
return (plan, vec![]);
};

let mut columns: Vec<Ident> = vec![];
// check if the inner projection and outer projection have a matching pattern like
// Projection: j1.j1_id AS id
// Projection: j1.j1_id
for (i, inner_expr) in inner_projection.expr.iter().enumerate() {
let Expr::Alias(ref outer_alias) = &outer_projections.expr[i] else {
return (plan, vec![]);
};

if outer_alias.expr.as_ref() != inner_expr {
return (plan, vec![]);
};

columns.push(outer_alias.name.as_str().into());
}

(outer_projections.input.as_ref(), columns)
}

impl From<BuilderError> for DataFusionError {
fn from(e: BuilderError) -> Self {
DataFusionError::External(Box::new(e))
Expand Down
29 changes: 29 additions & 0 deletions datafusion/sql/tests/cases/plan_to_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,35 @@ fn roundtrip_statement_with_dialect() -> Result<()> {
parser_dialect: Box::new(GenericDialect {}),
unparser_dialect: Box::new(UnparserDefaultDialect {}),
},
// more tests around subquery/derived table roundtrip
TestStatementWithDialect {
sql: "SELECT string_count FROM (
SELECT
j1_id,
MIN(j2_string)
FROM
j1 LEFT OUTER JOIN j2 ON
j1_id = j2_id
GROUP BY
j1_id
) AS agg (id, string_count)
",
expected: r#"SELECT agg.string_count FROM (SELECT j1.j1_id, MIN(j2.j2_string) FROM j1 LEFT JOIN j2 ON (j1.j1_id = j2.j2_id) GROUP BY j1.j1_id) AS agg (id, string_count)"#,
parser_dialect: Box::new(GenericDialect {}),
unparser_dialect: Box::new(UnparserDefaultDialect {}),
},
TestStatementWithDialect {
sql: "SELECT id FROM (SELECT j1_id from j1) AS c (id)",
expected: r#"SELECT c.id FROM (SELECT j1.j1_id FROM j1) AS c (id)"#,
parser_dialect: Box::new(GenericDialect {}),
unparser_dialect: Box::new(UnparserDefaultDialect {}),
},
TestStatementWithDialect {
sql: "SELECT id FROM (SELECT j1_id as id from j1) AS c",
expected: r#"SELECT c.id FROM (SELECT j1.j1_id AS id FROM j1) AS c"#,
parser_dialect: Box::new(GenericDialect {}),
unparser_dialect: Box::new(UnparserDefaultDialect {}),
},
];

for query in tests {
Expand Down

0 comments on commit adcf6f4

Please sign in to comment.