Skip to content

Commit

Permalink
Add IS [NOT] DISTINCT FROM to SQL and join matchers. (#14976)
Browse files Browse the repository at this point in the history
* Add IS [NOT] DISTINCT FROM to SQL and join matchers.

Changes:

1) Add "isdistinctfrom" and "notdistinctfrom" native expressions.

2) Add "IS [NOT] DISTINCT FROM" to SQL. It uses the new native expressions
   when generating expressions, and is treated the same as equals and
   not-equals when generating native filters on literals.

3) Update join matchers to have an "includeNull" parameter that determines
   whether we are operating in "equals" mode or "is not distinct from"
   mode.

* Main changes:

- Add ARRAY handling to "notdistinctfrom" and "isdistinctfrom".
- Include null in pushed-down filters when using "notdistinctfrom" in a join.

Other changes:
- Adjust join filter analyzer to more explicitly use InDimFilter's ValuesSets,
  relying less on remembering to get it right to avoid copies.

* Remove unused "wrap" method.

* Fixes.

* Remove methods we do not need.

* Fix bug with INPUT_REF.
  • Loading branch information
gianm authored Sep 20, 2023
1 parent 79f882f commit 823f620
Show file tree
Hide file tree
Showing 40 changed files with 1,288 additions and 318 deletions.
2 changes: 2 additions & 0 deletions docs/querying/sql-operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ Also see the [CONCAT function](sql-scalar.md#string-functions).
|Operator|Description|
|--------|-----------|
|`x = y` |Equal to|
|`x IS NOT DISTINCT FROM y`|Equal to, considering `NULL` as a value. Never returns `NULL`.|
|`x <> y`|Not equal to|
|`x IS DISTINCT FROM y`|Not equal to, considering `NULL` as a value. Never returns `NULL`.|
|`x > y` |Greater than|
|`x >= y`|Greater than or equal to|
|`x < y` |Less than|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ private static JoinAlgorithm deduceJoinAlgorithm(JoinAlgorithm preferredJoinAlgo
JoinAlgorithm deducedJoinAlgorithm;
if (JoinAlgorithm.BROADCAST.equals(preferredJoinAlgorithm)) {
deducedJoinAlgorithm = JoinAlgorithm.BROADCAST;
} else if (isConditionEqualityOnLeftAndRightColumns(joinDataSource.getConditionAnalysis())) {
} else if (canUseSortMergeJoin(joinDataSource.getConditionAnalysis())) {
deducedJoinAlgorithm = JoinAlgorithm.SORT_MERGE;
} else {
deducedJoinAlgorithm = JoinAlgorithm.BROADCAST;
Expand All @@ -237,15 +237,21 @@ private static JoinAlgorithm deduceJoinAlgorithm(JoinAlgorithm preferredJoinAlgo
}

/**
* Checks if the join condition on two tables "table1" and "table2" is of the form
* Checks if the sortMerge algorithm can execute a particular join condition.
*
* Two checks:
* (1) join condition on two tables "table1" and "table2" is of the form
* table1.columnA = table2.columnA && table1.columnB = table2.columnB && ....
* sortMerge algorithm can help these types of join conditions
*
* (2) join condition uses equals, not IS NOT DISTINCT FROM [sortMerge processor does not currently implement
* IS NOT DISTINCT FROM]
*/
private static boolean isConditionEqualityOnLeftAndRightColumns(JoinConditionAnalysis joinConditionAnalysis)
private static boolean canUseSortMergeJoin(JoinConditionAnalysis joinConditionAnalysis)
{
return joinConditionAnalysis.getEquiConditions()
.stream()
.allMatch(equality -> equality.getLeftExpr().isIdentifier());
return joinConditionAnalysis
.getEquiConditions()
.stream()
.allMatch(equality -> equality.getLeftExpr().isIdentifier() && !equality.isIncludeNull());
}

/**
Expand Down
50 changes: 46 additions & 4 deletions processing/src/main/java/org/apache/druid/math/expr/Exprs.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@

package org.apache.druid.math.expr;

import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.UOE;
import org.apache.druid.segment.join.Equality;
import org.apache.druid.segment.join.JoinPrefixUtils;

import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Stack;

Expand Down Expand Up @@ -79,16 +81,56 @@ public static List<Expr> decomposeAnd(final Expr expr)
}

/**
* Decomposes an equality expr into the left- and right-hand side.
* Decomposes an equality expr into an {@link Equality}. Used by join-related code to identify equi-joins.
*
* @return decomposed equality, or empty if the input expr was not an equality expr
*/
public static Optional<Pair<Expr, Expr>> decomposeEquals(final Expr expr)
public static Optional<Equality> decomposeEquals(final Expr expr, final String rightPrefix)
{
final Expr lhs;
final Expr rhs;
final boolean includeNull;

if (expr instanceof BinEqExpr) {
return Optional.of(Pair.of(((BinEqExpr) expr).left, ((BinEqExpr) expr).right));
lhs = ((BinEqExpr) expr).left;
rhs = ((BinEqExpr) expr).right;
includeNull = false;
} else if (expr instanceof FunctionExpr
&& ((FunctionExpr) expr).function instanceof Function.IsNotDistinctFromFunc) {
final List<Expr> args = ((FunctionExpr) expr).args;
lhs = args.get(0);
rhs = args.get(1);
includeNull = true;
} else {
return Optional.empty();
}

if (isLeftExprAndRightColumn(lhs, rhs, rightPrefix)) {
// rhs is a right-hand column; lhs is an expression solely of the left-hand side.
return Optional.of(
new Equality(
lhs,
Objects.requireNonNull(rhs.getBindingIfIdentifier()).substring(rightPrefix.length()),
includeNull
)
);
} else if (isLeftExprAndRightColumn(rhs, lhs, rightPrefix)) {
return Optional.of(
new Equality(
rhs,
Objects.requireNonNull(lhs.getBindingIfIdentifier()).substring(rightPrefix.length()),
includeNull
)
);
} else {
return Optional.empty();
}
}

private static boolean isLeftExprAndRightColumn(final Expr a, final Expr b, final String rightPrefix)
{
return a.analyzeInputs().getRequiredBindings().stream().noneMatch(c -> JoinPrefixUtils.isPrefixedBy(c, rightPrefix))
&& b.getBindingIfIdentifier() != null
&& JoinPrefixUtils.isPrefixedBy(b.getBindingIfIdentifier(), rightPrefix);
}
}
103 changes: 103 additions & 0 deletions processing/src/main/java/org/apache/druid/math/expr/Function.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.apache.druid.math.expr;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.DateTimes;
Expand Down Expand Up @@ -2225,6 +2226,108 @@ public <T> ExprVectorProcessor<T> asVectorProcessor(Expr.VectorInputBindingInspe
}
}

/**
* SQL function "x IS NOT DISTINCT FROM y". Very similar to "x = y", i.e. {@link BinEqExpr}, except this function
* never returns null, and this function considers NULL as a value, so NULL itself is not-distinct-from NULL. For
* example: `x == null` returns `null` in SQL-compatible null handling mode, but `notdistinctfrom(x, null)` is
* true if `x` is null.
*/
class IsNotDistinctFromFunc implements Function
{
@Override
public String name()
{
return "notdistinctfrom";
}

@Override
public ExprEval apply(List<Expr> args, Expr.ObjectBinding bindings)
{
final ExprEval leftVal = args.get(0).eval(bindings);
final ExprEval rightVal = args.get(1).eval(bindings);

if (leftVal.value() == null || rightVal.value() == null) {
return ExprEval.ofLongBoolean(leftVal.value() == null && rightVal.value() == null);
}

// Code copied and adapted from BinaryBooleanOpExprBase and BinEqExpr.
// The code isn't shared due to differences in code structure: BinaryBooleanOpExprBase + BinEqExpr have logic
// interleaved between parent and child class, but we can't use BinaryBooleanOpExprBase as a parent here, because
// (a) this is a function, not an expr; and (b) our logic for handling and returning nulls is different from most
// binary exprs, where null in means null out.
final ExpressionType comparisonType = ExpressionTypeConversion.autoDetect(leftVal, rightVal);
switch (comparisonType.getType()) {
case STRING:
return ExprEval.ofLongBoolean(Objects.equals(leftVal.asString(), rightVal.asString()));
case LONG:
return ExprEval.ofLongBoolean(leftVal.asLong() == rightVal.asLong());
case ARRAY:
final ExpressionType type = Preconditions.checkNotNull(
ExpressionTypeConversion.leastRestrictiveType(leftVal.type(), rightVal.type()),
"Cannot be null because ExprEval type is not nullable"
);
return ExprEval.ofLongBoolean(
type.getNullableStrategy().compare(leftVal.castTo(type).asArray(), rightVal.castTo(type).asArray()) == 0
);
case DOUBLE:
default:
if (leftVal.isNumericNull() || rightVal.isNumericNull()) {
return ExprEval.ofLongBoolean(leftVal.isNumericNull() && rightVal.isNumericNull());
} else {
return ExprEval.ofLongBoolean(leftVal.asDouble() == rightVal.asDouble());
}
}
}

@Override
public void validateArguments(List<Expr> args)
{
validationHelperCheckArgumentCount(args, 2);
}

@Nullable
@Override
public ExpressionType getOutputType(Expr.InputBindingInspector inspector, List<Expr> args)
{
return ExpressionType.LONG;
}
}

/**
* SQL function "x IS DISTINCT FROM y". Very similar to "x <> y", i.e. {@link BinNeqExpr}, except this function
* never returns null.
*
* Implemented as a subclass of IsNotDistinctFromFunc to keep the code simple, and because we expect "notdistinctfrom"
* to be more common than "isdistinctfrom" in actual usage.
*/
class IsDistinctFromFunc extends IsNotDistinctFromFunc
{
@Override
public String name()
{
return "isdistinctfrom";
}

@Override
public ExprEval apply(List<Expr> args, Expr.ObjectBinding bindings)
{
return ExprEval.ofLongBoolean(!super.apply(args, bindings).asBoolean());
}

@Override
public void validateArguments(List<Expr> args)
{
validationHelperCheckArgumentCount(args, 2);
}

@Nullable
@Override
public ExpressionType getOutputType(Expr.InputBindingInspector inspector, List<Expr> args)
{
return ExpressionType.LONG;
}
}

/**
* SQL function "IS NOT FALSE". Different from "IS TRUE" in that it returns true for NULL as well.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -659,9 +659,9 @@ public ValuesSet()

/**
* Create a ValuesSet from another Collection. The Collection will be reused if it is a {@link SortedSet} with
* an appropriate comparator.
* the {@link Comparators#naturalNullsFirst()} comparator.
*/
public ValuesSet(final Collection<String> values)
private ValuesSet(final Collection<String> values)
{
if (values instanceof SortedSet && Comparators.naturalNullsFirst()
.equals(((SortedSet<String>) values).comparator())) {
Expand All @@ -672,6 +672,36 @@ public ValuesSet(final Collection<String> values)
}
}

/**
* Creates an empty ValuesSet.
*/
public static ValuesSet create()
{
return new ValuesSet(new TreeSet<>(Comparators.naturalNullsFirst()));
}

/**
* Creates a ValuesSet wrapping the provided single value.
*
* @throws IllegalStateException if the provided collection cannot be wrapped since it has the wrong comparator
*/
public static ValuesSet of(@Nullable final String value)
{
final ValuesSet retVal = ValuesSet.create();
retVal.add(value);
return retVal;
}

/**
* Creates a ValuesSet copying the provided collection.
*/
public static ValuesSet copyOf(final Collection<String> values)
{
final TreeSet<String> copyOfValues = new TreeSet<>(Comparators.naturalNullsFirst());
copyOfValues.addAll(values);
return new ValuesSet(copyOfValues);
}

public SortedSet<ByteBuffer> toUtf8()
{
final TreeSet<ByteBuffer> valuesUtf8 = new TreeSet<>(ByteBufferUtils.utf8Comparator());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@ public class Equality
{
private final Expr leftExpr;
private final String rightColumn;
private final boolean includeNull;

public Equality(final Expr leftExpr, final String rightColumn)
public Equality(final Expr leftExpr, final String rightColumn, final boolean includeNull)
{
this.leftExpr = leftExpr;
this.rightColumn = rightColumn;
this.includeNull = includeNull;
}

public Expr getLeftExpr()
Expand All @@ -49,12 +51,22 @@ public String getRightColumn()
return rightColumn;
}

/**
* Whether null is treated as a value that can be equal to itself. True for conditions using "IS NOT DISTINCT FROM",
* false for conditions using regular equals.
*/
public boolean isIncludeNull()
{
return includeNull;
}

@Override
public String toString()
{
return "Equality{" +
"leftExpr=" + leftExpr +
", rightColumn='" + rightColumn + '\'' +
", includeNull=" + includeNull +
'}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.apache.druid.segment.join;

import com.google.common.base.Preconditions;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprMacroTable;
import org.apache.druid.math.expr.Exprs;
Expand Down Expand Up @@ -121,40 +120,18 @@ public static JoinConditionAnalysis forExpression(

final List<Expr> exprs = Exprs.decomposeAnd(conditionExpr);
for (Expr childExpr : exprs) {
final Optional<Pair<Expr, Expr>> maybeDecomposed = Exprs.decomposeEquals(childExpr);
final Optional<Equality> maybeEquality = Exprs.decomposeEquals(childExpr, rightPrefix);

if (!maybeDecomposed.isPresent()) {
if (!maybeEquality.isPresent()) {
nonEquiConditions.add(childExpr);
} else {
final Pair<Expr, Expr> decomposed = maybeDecomposed.get();
final Expr lhs = Objects.requireNonNull(decomposed.lhs);
final Expr rhs = Objects.requireNonNull(decomposed.rhs);

if (isLeftExprAndRightColumn(lhs, rhs, rightPrefix)) {
// rhs is a right-hand column; lhs is an expression solely of the left-hand side.
equiConditions.add(
new Equality(lhs, Objects.requireNonNull(rhs.getBindingIfIdentifier()).substring(rightPrefix.length()))
);
} else if (isLeftExprAndRightColumn(rhs, lhs, rightPrefix)) {
equiConditions.add(
new Equality(rhs, Objects.requireNonNull(lhs.getBindingIfIdentifier()).substring(rightPrefix.length()))
);
} else {
nonEquiConditions.add(childExpr);
}
equiConditions.add(maybeEquality.get());
}
}

return new JoinConditionAnalysis(condition, rightPrefix, equiConditions, nonEquiConditions);
}

private static boolean isLeftExprAndRightColumn(final Expr a, final Expr b, final String rightPrefix)
{
return a.analyzeInputs().getRequiredBindings().stream().noneMatch(c -> JoinPrefixUtils.isPrefixedBy(c, rightPrefix))
&& b.getBindingIfIdentifier() != null
&& JoinPrefixUtils.isPrefixedBy(b.getBindingIfIdentifier(), rightPrefix);
}

/**
* Return the condition expression.
*/
Expand Down
Loading

0 comments on commit 823f620

Please sign in to comment.