Skip to content

Commit

Permalink
minor(13525): validate unique field names on query and subquery schem…
Browse files Browse the repository at this point in the history
…as, after each optimizer pass
  • Loading branch information
wiedld committed Dec 4, 2024
1 parent 6d43dc2 commit a855811
Showing 1 changed file with 18 additions and 1 deletion.
19 changes: 18 additions & 1 deletion datafusion/optimizer/src/optimizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use log::{debug, warn};
use datafusion_common::alias::AliasGenerator;
use datafusion_common::config::ConfigOptions;
use datafusion_common::instant::Instant;
use datafusion_common::tree_node::{Transformed, TreeNodeRewriter};
use datafusion_common::tree_node::{Transformed, TreeNodeRecursion, TreeNodeRewriter};
use datafusion_common::{internal_err, DFSchema, DataFusionError, HashSet, Result};
use datafusion_expr::logical_plan::LogicalPlan;

Expand Down Expand Up @@ -479,6 +479,9 @@ fn check_plan(
// verify invariant: optimizer rule didn't change the schema
assert_schema_is_the_same(check_name, &prev_schema, plan)?;

// verify invariant: fields must have unique names
assert_unique_field_names(plan)?;

// TODO: trait API and provide extension on the Optimizer to define own validations?
Ok(())
}
Expand Down Expand Up @@ -508,6 +511,20 @@ pub(crate) fn assert_schema_is_the_same(
}
}

/// Returns an error if plan, and subplans, do not have unique fields.
///
/// This invariant is subject to change.
/// refer: <https://github.com/apache/datafusion/issues/13525#issuecomment-2494046463>
fn assert_unique_field_names(plan: &LogicalPlan) -> Result<()> {
plan.schema().check_names()?;

plan.apply_with_subqueries(|plan: &LogicalPlan| {
plan.schema().check_names()?;
Ok(TreeNodeRecursion::Continue)
})
.map(|_| ())
}

#[cfg(test)]
mod tests {
use std::sync::{Arc, Mutex};
Expand Down

0 comments on commit a855811

Please sign in to comment.