Skip to content

Commit

Permalink
minor(13525): validate union after each optimizer passes
Browse files Browse the repository at this point in the history
  • Loading branch information
wiedld committed Dec 4, 2024
1 parent a855811 commit 0163a40
Showing 1 changed file with 27 additions and 0 deletions.
27 changes: 27 additions & 0 deletions datafusion/optimizer/src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use std::sync::Arc;

use chrono::{DateTime, Utc};
use datafusion_expr::registry::FunctionRegistry;
use datafusion_expr::Union;
use log::{debug, warn};

use datafusion_common::alias::AliasGenerator;
Expand Down Expand Up @@ -482,6 +483,14 @@ fn check_plan(
// verify invariant: fields must have unique names
assert_unique_field_names(plan)?;

/* This current fails for:
- execution::context::tests::cross_catalog_access
- at test_files/string/string.slt:46
External error: query failed: DataFusion error: Optimizer rule 'eliminate_nested_union' failed
*/
// verify invariant: equivalent schema across union inputs
// assert_unions_are_valid(check_name, plan)?;

// TODO: trait API and provide extension on the Optimizer to define own validations?
Ok(())
}
Expand Down Expand Up @@ -525,6 +534,24 @@ fn assert_unique_field_names(plan: &LogicalPlan) -> Result<()> {
.map(|_| ())
}

/// Returns an error if any union nodes are invalid.
#[allow(dead_code)]
fn assert_unions_are_valid(rule_name: &str, plan: &LogicalPlan) -> Result<()> {
plan.apply_with_subqueries(|plan: &LogicalPlan| {
if let LogicalPlan::Union(Union { schema, inputs }) = plan {
inputs.iter().try_for_each(|subplan| {
assert_schema_is_the_same(
format!("{rule_name}:union_check").as_str(),
schema,
subplan,
)
})?;
}
Ok(TreeNodeRecursion::Continue)
})
.map(|_| ())
}

#[cfg(test)]
mod tests {
use std::sync::{Arc, Mutex};
Expand Down

0 comments on commit 0163a40

Please sign in to comment.