From 03e39da62e403e064d21b57e9d6c200464c03749 Mon Sep 17 00:00:00 2001 From: Rohan Krishnaswamy <47869999+rkrishn7@users.noreply.github.com> Date: Fri, 13 Dec 2024 07:22:24 -0800 Subject: [PATCH] fix: Implicitly plan `UNNEST` as lateral (#13695) * plan implicit lateral if table factor is UNNEST * check for outer references in `create_relation_subquery` * add sqllogictest * fix lateral constant test to not expect a subquery node * replace sqllogictest in favor of logical plan test * update lateral join sqllogictests * add sqllogictests * fix logical plan test --- datafusion/sql/src/relation/join.rs | 1 + datafusion/sql/src/relation/mod.rs | 6 +++ datafusion/sql/tests/sql_integration.rs | 20 +++++++-- datafusion/sqllogictest/test_files/joins.slt | 20 +++++---- datafusion/sqllogictest/test_files/unnest.slt | 41 +++++++++++++++++++ 5 files changed, 77 insertions(+), 11 deletions(-) diff --git a/datafusion/sql/src/relation/join.rs b/datafusion/sql/src/relation/join.rs index 2ed1197e8fbf..75f39792bce1 100644 --- a/datafusion/sql/src/relation/join.rs +++ b/datafusion/sql/src/relation/join.rs @@ -163,6 +163,7 @@ pub(crate) fn is_lateral(factor: &TableFactor) -> bool { match factor { TableFactor::Derived { lateral, .. } => *lateral, TableFactor::Function { lateral, .. } => *lateral, + TableFactor::UNNEST { .. } => true, _ => false, } } diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs index 45a617daae96..8915b0069269 100644 --- a/datafusion/sql/src/relation/mod.rs +++ b/datafusion/sql/src/relation/mod.rs @@ -188,6 +188,12 @@ impl SqlToRel<'_, S> { planner_context.set_outer_query_schema(old_query_schema); planner_context.set_outer_from_schema(Some(old_from_schema)); + // We can omit the subquery wrapper if there are no columns + // referencing the outer scope. + if outer_ref_columns.is_empty() { + return Ok(plan); + } + match plan { LogicalPlan::SubqueryAlias(SubqueryAlias { input, alias, .. }) => { subquery_alias( diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 8c2d8ebad43f..9363d16c9fc9 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -3129,9 +3129,8 @@ fn lateral_constant() { \n Cross Join: \ \n TableScan: j1\ \n SubqueryAlias: j2\ - \n Subquery:\ - \n Projection: Int64(1)\ - \n EmptyRelation"; + \n Projection: Int64(1)\ + \n EmptyRelation"; quick_test(sql, expected); } @@ -3230,6 +3229,21 @@ fn lateral_nested_left_join() { quick_test(sql, expected); } +#[test] +fn lateral_unnest() { + let sql = "SELECT * from unnest_table u, unnest(u.array_col)"; + let expected = "Projection: *\ + \n Cross Join: \ + \n SubqueryAlias: u\ + \n TableScan: unnest_table\ + \n Subquery:\ + \n Projection: __unnest_placeholder(outer_ref(u.array_col),depth=1) AS UNNEST(outer_ref(u.array_col))\ + \n Unnest: lists[__unnest_placeholder(outer_ref(u.array_col))|depth=1] structs[]\ + \n Projection: outer_ref(u.array_col) AS __unnest_placeholder(outer_ref(u.array_col))\ + \n EmptyRelation"; + quick_test(sql, expected); +} + #[test] fn hive_aggregate_with_filter() -> Result<()> { let dialect = &HiveDialect {}; diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 49aaa877caa6..68426f180d99 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -4058,10 +4058,12 @@ logical_plan 03)----TableScan: join_t1 projection=[t1_id, t1_name] 04)--SubqueryAlias: series 05)----Subquery: -06)------Projection: __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)),depth=1) AS i -07)--------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))|depth=1] structs[] -08)----------Projection: generate_series(Int64(1), CAST(outer_ref(t1.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int))) -09)------------EmptyRelation +06)------Projection: UNNEST(generate_series(Int64(1),outer_ref(t1.t1_int))) AS i +07)--------Subquery: +08)----------Projection: __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)),depth=1) AS UNNEST(generate_series(Int64(1),outer_ref(t1.t1_int))) +09)------------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))|depth=1] structs[] +10)--------------Projection: generate_series(Int64(1), CAST(outer_ref(t1.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int))) +11)----------------EmptyRelation physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(UInt32, Column { relation: Some(Bare { table: "t1" }), name: "t1_int" }) @@ -4081,10 +4083,12 @@ logical_plan 03)----TableScan: join_t1 projection=[t1_id, t1_name] 04)--SubqueryAlias: series 05)----Subquery: -06)------Projection: __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)),depth=1) AS i -07)--------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))|depth=1] structs[] -08)----------Projection: generate_series(Int64(1), CAST(outer_ref(t2.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int))) -09)------------EmptyRelation +06)------Projection: UNNEST(generate_series(Int64(1),outer_ref(t2.t1_int))) AS i +07)--------Subquery: +08)----------Projection: __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)),depth=1) AS UNNEST(generate_series(Int64(1),outer_ref(t2.t1_int))) +09)------------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))|depth=1] structs[] +10)--------------Projection: generate_series(Int64(1), CAST(outer_ref(t2.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int))) +11)----------------EmptyRelation physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(UInt32, Column { relation: Some(Bare { table: "t2" }), name: "t1_int" }) diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index 1c54006bd2a0..2685e18427ca 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -860,6 +860,47 @@ select count(*) from (select unnest(range(0, 100000)) id) t inner join (select u ---- 100000 +# Test implicit LATERAL support for UNNEST +# Issue: https://github.com/apache/datafusion/issues/13659 +# TODO: https://github.com/apache/datafusion/issues/10048 +query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\) +select * from unnest_table u, unnest(u.column1); + +# Test implicit LATERAL support for UNNEST (INNER JOIN) +query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\) +select * from unnest_table u INNER JOIN unnest(u.column1) AS t(column1) ON u.column3 = t.column1; + +# Test implicit LATERAL planning for UNNEST +query TT +explain select * from unnest_table u, unnest(u.column1); +---- +logical_plan +01)Cross Join: +02)--SubqueryAlias: u +03)----TableScan: unnest_table projection=[column1, column2, column3, column4, column5] +04)--Subquery: +05)----Projection: __unnest_placeholder(outer_ref(u.column1),depth=1) AS UNNEST(outer_ref(u.column1)) +06)------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1] structs[] +07)--------Projection: outer_ref(u.column1) AS __unnest_placeholder(outer_ref(u.column1)) +08)----------EmptyRelation +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), Column { relation: Some(Bare { table: "u" }), name: "column1" }) + +# Test implicit LATERAL planning for UNNEST (INNER JOIN) +query TT +explain select * from unnest_table u INNER JOIN unnest(u.column1) AS t(column1) ON u.column3 = t.column1; +---- +logical_plan +01)Inner Join: u.column3 = t.column1 +02)--SubqueryAlias: u +03)----TableScan: unnest_table projection=[column1, column2, column3, column4, column5] +04)--SubqueryAlias: t +05)----Subquery: +06)------Projection: __unnest_placeholder(outer_ref(u.column1),depth=1) AS column1 +07)--------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1] structs[] +08)----------Projection: outer_ref(u.column1) AS __unnest_placeholder(outer_ref(u.column1)) +09)------------EmptyRelation +physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), Column { relation: Some(Bare { table: "u" }), name: "column1" }) + ## Unnest in subquery query IIII