Skip to content

Commit

Permalink
Incorporate non-null information, and prevent its deletion (#28566)
Browse files Browse the repository at this point in the history
Incorporate nullability information into `Equivalences` from `Get::typ`
information, and harden the equivalence class minimization to avoid
using nullability information from the input types, as this causes its
*deletion* from the equivalence classes.

### Motivation

The `Equivalences` analysis did not introduce nullability statements
when made by `Get` nodes, and it was unfortunately deleting nullability
information when performing expression minimization:
`expr.reduce(&types)` will now (as of
#28018) optimize
**away** expressions based on the nullability information in `types`.
Independent of whether this is intended behavior generally, we can
harden the code here to remove any nullability statements surfaced
through the type, in order to prevent their removal when we invoke
`expr.reduce`.

The outcome is several tests that would experience regressions were we
to remove `ColumnKnowledge` no longer experiencing this.

### Tips for reviewer

<!--
Leave some tips for your reviewer, like:

    * The diff is much smaller if viewed with whitespace hidden.
    * [Some function/module/file] deserves extra attention.
* [Some function/module/file] is pure code movement and only needs a
skim.

Delete this section if no tips.
-->

### Checklist

- [ ] This PR has adequate test coverage / QA involvement has been duly
considered. ([trigger-ci for additional test/nightly
runs](https://trigger-ci.dev.materialize.com/))
- [ ] This PR has an associated up-to-date [design
doc](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/README.md),
is a design doc
([template](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/design/00000000_template.md)),
or is sufficiently small to not require a design.
  <!-- Reference the design in the description. -->
- [ ] If this PR evolves [an existing `$T ⇔ Proto$T`
mapping](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/command-and-response-binary-encoding.md)
(possibly in a backwards-incompatible way), then it is tagged with a
`T-proto` label.
- [ ] If this PR will require changes to cloud orchestration or tests,
there is a companion cloud PR to account for those changes that is
tagged with the release-blocker label
([example](MaterializeInc/cloud#5021)).
<!-- Ask in #team-cloud on Slack if you need help preparing the cloud
PR. -->
- [ ] This PR includes the following [user-facing behavior
changes](https://github.com/MaterializeInc/materialize/blob/main/doc/developer/guide-changes.md#what-changes-require-a-release-note):
- <!-- Add release notes here or explicitly state that there are no
user-facing behavior changes. -->
  • Loading branch information
frankmcsherry authored Jul 26, 2024
1 parent e5c5348 commit 45d7809
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 20 deletions.
24 changes: 21 additions & 3 deletions src/transform/src/analysis/equivalences.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl Analysis for Equivalences {
}
Some(equivalences)
}
MirRelationExpr::Get { id, .. } => {
MirRelationExpr::Get { id, typ, .. } => {
let mut equivalences = Some(EquivalenceClasses::default());
// Find local identifiers, but nothing for external identifiers.
if let Id::Local(id) = id {
Expand All @@ -94,6 +94,19 @@ impl Analysis for Equivalences {
}
}
}
// Incorporate statements about column nullability.
let mut non_null_cols = vec![MirScalarExpr::literal_false()];
for (index, col_type) in typ.column_types.iter().enumerate() {
if !col_type.nullable {
non_null_cols.push(MirScalarExpr::column(index).call_is_null());
}
}
if non_null_cols.len() > 1 {
if let Some(equivalences) = equivalences.as_mut() {
equivalences.classes.push(non_null_cols);
}
}

equivalences
}
MirRelationExpr::Let { .. } => results.get(index - 1).unwrap().clone(),
Expand Down Expand Up @@ -206,8 +219,13 @@ impl Analysis for Equivalences {
MirRelationExpr::ArrangeBy { .. } => results.get(index - 1).unwrap().clone(),
};

let expr_type = &depends.results::<RelationType>().unwrap()[index];
equivalences.as_mut().map(|e| e.minimize(expr_type));
// Capture the expression type, but remove statements about column nullability.
// Using such statements will optimize away equivalence expressions stating the same.
let mut expr_type = depends.results::<RelationType>().unwrap()[index].clone();
for typ in expr_type.as_mut().unwrap().iter_mut() {
typ.nullable = true;
}
equivalences.as_mut().map(|e| e.minimize(&expr_type));
equivalences
}

Expand Down
28 changes: 11 additions & 17 deletions test/sqllogictest/outer_join_simplification.slt
Original file line number Diff line number Diff line change
Expand Up @@ -838,25 +838,25 @@ select * from c0;
----
Explained Query:
Return // { arity: 3 }
Get l3 // { arity: 3 }
Get l2 // { arity: 3 }
With Mutually Recursive
cte l3 =
cte l2 =
Distinct project=[#0..=#2] // { arity: 3 }
Union // { arity: 3 }
Project (#0..=#2) // { arity: 3 }
Join on=(#0 = #3 AND #1 = #4 AND #7 = case when (#6) IS NULL then null else #5 end) type=delta // { arity: 8 }
ArrangeBy keys=[[#0], [#1]] // { arity: 3 }
Get l3 // { arity: 3 }
Get l2 // { arity: 3 }
ArrangeBy keys=[[#0]] // { arity: 1 }
Union // { arity: 1 }
Get l1 // { arity: 1 }
Get l0 // { arity: 1 }
Threshold // { arity: 1 }
Union // { arity: 1 }
Negate // { arity: 1 }
Get l1 // { arity: 1 }
Get l0 // { arity: 1 }
Distinct project=[#0] // { arity: 1 }
Project (#0) // { arity: 1 }
Get l0 // { arity: 2 }
Get l2 // { arity: 3 }
ArrangeBy keys=[[#0], [case when (#2) IS NULL then null else #1 end]] // { arity: 3 }
Union // { arity: 3 }
Project (#0, #1, #3) // { arity: 3 }
Expand All @@ -870,34 +870,28 @@ Explained Query:
ReadStorage materialize.public.baz // { arity: 3 }
Distinct project=[#0] // { arity: 1 }
Project (#1) // { arity: 1 }
Get l0 // { arity: 2 }
Get l2 // { arity: 3 }
ArrangeBy keys=[[#0]] // { arity: 1 }
Union // { arity: 1 }
Get l2 // { arity: 1 }
Get l1 // { arity: 1 }
Threshold // { arity: 1 }
Union // { arity: 1 }
Negate // { arity: 1 }
Get l2 // { arity: 1 }
Get l1 // { arity: 1 }
Distinct project=[#0] // { arity: 1 }
Union // { arity: 1 }
Project (#1) // { arity: 1 }
ReadStorage materialize.public.baz // { arity: 3 }
Constant // { arity: 1 }
- (null)
ReadStorage materialize.public.foo // { arity: 3 }
cte l2 =
cte l1 =
Project (#0) // { arity: 1 }
ReadStorage materialize.public.quux // { arity: 2 }
cte l1 =
cte l0 =
Project (#0) // { arity: 1 }
Filter (#0) IS NOT NULL // { arity: 2 }
ReadStorage materialize.public.bar // { arity: 2 }
cte l0 =
Union // { arity: 2 }
Project (#0, #1) // { arity: 2 }
Get l3 // { arity: 3 }
Constant // { arity: 2 }
- (null, null)

Source materialize.public.foo
Source materialize.public.bar
Expand Down

0 comments on commit 45d7809

Please sign in to comment.