From a02d07b8017ea567f4a1b294bca6c704f9586985 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Wed, 18 Oct 2023 13:07:35 -0700 Subject: [PATCH] add native filters for "(filter) is true" and "(filter) is false" (#15182) * add native filters for "(filter) is true" and "(filter) is false" changes: * add IsTrueDimFilter, IsFalseDimFilter, and abstract IsBooleanDimFilter for native json filter implementations of `(filter) IS TRUE` and `(filter) IS FALSE` * add IsBooleanFilter for actual filtering logic for these filters, which ignore includeUnknown to always use matches with false for true and !matches with true for false * fix test incorrectly adjusted to wrong answer in #15058 * add tests for default value mode --- .../apache/druid/query/filter/DimFilter.java | 4 +- .../druid/query/filter/DimFilterUtils.java | 2 + .../query/filter/IsBooleanDimFilter.java | 120 ++++++++++ .../druid/query/filter/IsFalseDimFilter.java | 45 ++++ .../druid/query/filter/IsTrueDimFilter.java | 45 ++++ .../segment/filter/ExpressionFilter.java | 2 +- .../druid/segment/filter/IsBooleanFilter.java | 210 ++++++++++++++++++ .../query/filter/IsBooleanDimFilterTest.java | 93 ++++++++ .../segment/filter/EqualityFilterTests.java | 44 ++++ .../sql/calcite/expression/Expressions.java | 68 ++++-- .../calcite/filtration/BottomUpTransform.java | 18 ++ .../sql/calcite/filtration/Filtration.java | 4 +- .../filtration/RemoveRedundantIsTrue.java | 100 +++++++++ .../sql/calcite/BaseCalciteQueryTest.java | 6 + .../sql/calcite/CalciteArraysQueryTest.java | 4 +- .../sql/calcite/CalciteJoinQueryTest.java | 9 +- .../druid/sql/calcite/CalciteQueryTest.java | 18 +- 17 files changed, 763 insertions(+), 29 deletions(-) create mode 100644 processing/src/main/java/org/apache/druid/query/filter/IsBooleanDimFilter.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/IsFalseDimFilter.java create mode 100644 processing/src/main/java/org/apache/druid/query/filter/IsTrueDimFilter.java create mode 100644 processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java create mode 100644 processing/src/test/java/org/apache/druid/query/filter/IsBooleanDimFilterTest.java create mode 100644 sql/src/main/java/org/apache/druid/sql/calcite/filtration/RemoveRedundantIsTrue.java diff --git a/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java index 4e4a3b10ec940..95c7b78862b99 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DimFilter.java @@ -51,7 +51,9 @@ @JsonSubTypes.Type(name = "false", value = FalseDimFilter.class), @JsonSubTypes.Type(name = "null", value = NullFilter.class), @JsonSubTypes.Type(name = "equals", value = EqualityFilter.class), - @JsonSubTypes.Type(name = "range", value = RangeFilter.class) + @JsonSubTypes.Type(name = "range", value = RangeFilter.class), + @JsonSubTypes.Type(name = "isfalse", value = IsFalseDimFilter.class), + @JsonSubTypes.Type(name = "istrue", value = IsTrueDimFilter.class) }) public interface DimFilter extends Cacheable { diff --git a/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java b/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java index 27a0581d47522..ed03efac38a63 100644 --- a/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java +++ b/processing/src/main/java/org/apache/druid/query/filter/DimFilterUtils.java @@ -58,6 +58,8 @@ public class DimFilterUtils static final byte EQUALS_CACHE_ID = 0x13; static final byte RANGE_CACHE_ID = 0x14; + static final byte IS_FILTER_BOOLEAN_FILTER_CACHE_ID = 0x15; + public static final byte STRING_SEPARATOR = (byte) 0xFF; diff --git a/processing/src/main/java/org/apache/druid/query/filter/IsBooleanDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/IsBooleanDimFilter.java new file mode 100644 index 0000000000000..4b766de3af92d --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/IsBooleanDimFilter.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.RangeSet; +import org.apache.druid.error.DruidException; +import org.apache.druid.query.cache.CacheKeyBuilder; +import org.apache.druid.segment.filter.IsBooleanFilter; + +import java.util.Objects; +import java.util.Set; + +/** + * Abstract SQL three-value logic wrapper for some child {@link DimFilter} to implement '{filter} IS TRUE' and + * '{filter} IS FALSE'. + * + * @see IsTrueDimFilter - IS TRUE + * @see IsFalseDimFilter - IS FALSE + * @see IsBooleanFilter - actual filtering logic + */ +public abstract class IsBooleanDimFilter extends AbstractOptimizableDimFilter +{ + private final DimFilter field; + private final boolean isTrue; + + public IsBooleanDimFilter( + DimFilter field, + boolean isTrue + ) + { + if (field == null) { + throw DruidException.forPersona(DruidException.Persona.USER) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build("IS %s operator requires a non-null filter for field", isTrue ? "TRUE" : "FALSE"); + } + this.field = field; + this.isTrue = isTrue; + } + + @JsonProperty("field") + public DimFilter getField() + { + return field; + } + + @Override + public byte[] getCacheKey() + { + return new CacheKeyBuilder(DimFilterUtils.IS_FILTER_BOOLEAN_FILTER_CACHE_ID).appendBoolean(isTrue) + .appendCacheable(field) + .build(); + } + + @Override + public Filter toFilter() + { + return new IsBooleanFilter(field.toFilter(), isTrue); + } + + @Override + public RangeSet getDimensionRangeSet(String dimension) + { + return null; + } + + @Override + public Set getRequiredColumns() + { + return field.getRequiredColumns(); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + IsBooleanDimFilter that = (IsBooleanDimFilter) o; + + if (field != null ? !field.equals(that.field) : that.field != null) { + return false; + } + + return isTrue == that.isTrue; + } + + @Override + public int hashCode() + { + return Objects.hash(field, isTrue); + } + + @Override + public String toString() + { + return "(" + field + ") IS " + (isTrue ? "TRUE" : "FALSE"); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/IsFalseDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/IsFalseDimFilter.java new file mode 100644 index 0000000000000..7e674869a883b --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/IsFalseDimFilter.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +public class IsFalseDimFilter extends IsBooleanDimFilter +{ + public static IsFalseDimFilter of(DimFilter field) + { + return new IsFalseDimFilter(field); + } + + @JsonCreator + public IsFalseDimFilter( + @JsonProperty("field") DimFilter field + ) + { + super(field, false); + } + + @Override + public DimFilter optimize() + { + return new IsFalseDimFilter(getField().optimize()); + } +} diff --git a/processing/src/main/java/org/apache/druid/query/filter/IsTrueDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/IsTrueDimFilter.java new file mode 100644 index 0000000000000..a61897f10f0f2 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/query/filter/IsTrueDimFilter.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; + +public class IsTrueDimFilter extends IsBooleanDimFilter +{ + public static IsTrueDimFilter of(DimFilter field) + { + return new IsTrueDimFilter(field); + } + + @JsonCreator + public IsTrueDimFilter( + @JsonProperty("field") DimFilter field + ) + { + super(field, true); + } + + @Override + public DimFilter optimize() + { + return new IsTrueDimFilter(getField().optimize()); + } +} diff --git a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java index 6d2bfab4b6a3f..1208865d4ce15 100644 --- a/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java +++ b/processing/src/main/java/org/apache/druid/segment/filter/ExpressionFilter.java @@ -186,7 +186,7 @@ public boolean matches(boolean includeUnknown) return Arrays.stream(dResult).filter(Objects::nonNull).anyMatch(o -> Evals.asBoolean((double) o)); } } - return (includeUnknown && eval.value() == null) || eval.asBoolean(); + return eval.asBoolean(); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java new file mode 100644 index 0000000000000..ddf3972ccff82 --- /dev/null +++ b/processing/src/main/java/org/apache/druid/segment/filter/IsBooleanFilter.java @@ -0,0 +1,210 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.segment.filter; + +import org.apache.druid.common.config.NullHandling; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.query.BitmapResultFactory; +import org.apache.druid.query.filter.ColumnIndexSelector; +import org.apache.druid.query.filter.Filter; +import org.apache.druid.query.filter.ValueMatcher; +import org.apache.druid.query.filter.vector.BaseVectorValueMatcher; +import org.apache.druid.query.filter.vector.ReadableVectorMatch; +import org.apache.druid.query.filter.vector.VectorMatch; +import org.apache.druid.query.filter.vector.VectorValueMatcher; +import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; +import org.apache.druid.segment.ColumnInspector; +import org.apache.druid.segment.ColumnSelector; +import org.apache.druid.segment.ColumnSelectorFactory; +import org.apache.druid.segment.column.ColumnIndexCapabilities; +import org.apache.druid.segment.index.BitmapColumnIndex; +import org.apache.druid.segment.vector.VectorColumnSelectorFactory; + +import javax.annotation.Nullable; +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * SQL three-value logic wrapper for some child {@link Filter} to implement '{filter} IS TRUE' and + * '{filter} IS FALSE'. Primarily useful when living beneath a {@link NotFilter} because this filter purposely ignores + * the value of {@code includeUnknown} and so always correctly only returns values that definitely match or do not match + * the filter to produce correct results for '{filter} IS NOT TRUE' and '{filter} IS NOT FALSE'. This filter is a + * relatively thin wrapper, so should be relatively harmless if used without a 'NOT' filter. + * + * @see org.apache.druid.query.filter.IsBooleanDimFilter + * @see org.apache.druid.query.filter.IsTrueDimFilter + * @see org.apache.druid.query.filter.IsFalseDimFilter + */ +public class IsBooleanFilter implements Filter +{ + private final Filter baseFilter; + private final boolean isTrue; + + public IsBooleanFilter(Filter baseFilter, boolean isTrue) + { + this.baseFilter = baseFilter; + this.isTrue = isTrue; + } + + @Nullable + @Override + public BitmapColumnIndex getBitmapColumnIndex(ColumnIndexSelector selector) + { + final BitmapColumnIndex baseIndex = baseFilter.getBitmapColumnIndex(selector); + if (baseIndex != null && (isTrue || baseIndex.getIndexCapabilities().isInvertible())) { + return new BitmapColumnIndex() + { + private final boolean useThreeValueLogic = NullHandling.useThreeValueLogic(); + @Override + public ColumnIndexCapabilities getIndexCapabilities() + { + return baseIndex.getIndexCapabilities(); + } + + @Override + public double estimateSelectivity(int totalRows) + { + return 1. - baseFilter.estimateSelectivity(selector); + } + + @Override + public T computeBitmapResult(BitmapResultFactory bitmapResultFactory, boolean includeUnknown) + { + if (isTrue) { + return baseIndex.computeBitmapResult(bitmapResultFactory, false); + } + return bitmapResultFactory.complement( + baseIndex.computeBitmapResult(bitmapResultFactory, useThreeValueLogic), + selector.getNumRows() + ); + } + }; + } + return null; + } + + @Override + public ValueMatcher makeMatcher(ColumnSelectorFactory factory) + { + final ValueMatcher baseMatcher = baseFilter.makeMatcher(factory); + + return new ValueMatcher() + { + private final boolean useThreeValueLogic = NullHandling.useThreeValueLogic(); + @Override + public boolean matches(boolean includeUnknown) + { + if (isTrue) { + return baseMatcher.matches(false); + } + return !baseMatcher.matches(useThreeValueLogic); + } + + @Override + public void inspectRuntimeShape(RuntimeShapeInspector inspector) + { + inspector.visit("baseMatcher", baseMatcher); + } + }; + } + + @Override + public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory) + { + final VectorValueMatcher baseMatcher = baseFilter.makeVectorMatcher(factory); + + return new BaseVectorValueMatcher(baseMatcher) + { + private final VectorMatch scratch = VectorMatch.wrap(new int[factory.getMaxVectorSize()]); + private final boolean useThreeValueLogic = NullHandling.useThreeValueLogic(); + + @Override + public ReadableVectorMatch match(final ReadableVectorMatch mask, boolean includeUnknown) + { + if (isTrue) { + return baseMatcher.match(mask, false); + } + final ReadableVectorMatch baseMatch = baseMatcher.match(mask, useThreeValueLogic); + + scratch.copyFrom(mask); + scratch.removeAll(baseMatch); + assert scratch.isValid(mask); + return scratch; + } + }; + } + + @Override + public boolean canVectorizeMatcher(ColumnInspector inspector) + { + return baseFilter.canVectorizeMatcher(inspector); + } + + @Override + public Set getRequiredColumns() + { + return baseFilter.getRequiredColumns(); + } + + @Override + public boolean supportsRequiredColumnRewrite() + { + return baseFilter.supportsRequiredColumnRewrite(); + } + + @Override + public Filter rewriteRequiredColumns(Map columnRewrites) + { + return new IsBooleanFilter(baseFilter.rewriteRequiredColumns(columnRewrites), isTrue); + } + + @Override + public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, ColumnIndexSelector indexSelector) + { + return baseFilter.supportsSelectivityEstimation(columnSelector, indexSelector); + } + + @Override + public String toString() + { + return StringUtils.format("(%s) IS %s", baseFilter, isTrue ? "TRUE" : "FALSE"); + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + IsBooleanFilter isFilter = (IsBooleanFilter) o; + return Objects.equals(baseFilter, isFilter.baseFilter); + } + + @Override + public int hashCode() + { + // to return a different hash from baseFilter + return Objects.hash(1, baseFilter, isTrue); + } +} diff --git a/processing/src/test/java/org/apache/druid/query/filter/IsBooleanDimFilterTest.java b/processing/src/test/java/org/apache/druid/query/filter/IsBooleanDimFilterTest.java new file mode 100644 index 0000000000000..7c4be474efefc --- /dev/null +++ b/processing/src/test/java/org/apache/druid/query/filter/IsBooleanDimFilterTest.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.query.filter; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.apache.druid.error.DruidException; +import org.apache.druid.jackson.DefaultObjectMapper; +import org.apache.druid.segment.column.ColumnType; +import org.apache.druid.testing.InitializedNullHandlingTest; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Arrays; + +public class IsBooleanDimFilterTest extends InitializedNullHandlingTest +{ + @Test + public void testSerde() throws JsonProcessingException + { + ObjectMapper mapper = new DefaultObjectMapper(); + EqualityFilter baseFilter = new EqualityFilter("x", ColumnType.STRING, "hello", null); + + IsTrueDimFilter trueFilter = IsTrueDimFilter.of(baseFilter); + String s = mapper.writeValueAsString(trueFilter); + Assert.assertEquals(trueFilter, mapper.readValue(s, IsTrueDimFilter.class)); + + IsFalseDimFilter falseFilter = IsFalseDimFilter.of(baseFilter); + s = mapper.writeValueAsString(falseFilter); + Assert.assertEquals(falseFilter, mapper.readValue(s, IsFalseDimFilter.class)); + + } + + @Test + public void testGetCacheKey() + { + EqualityFilter f1 = new EqualityFilter("x", ColumnType.STRING, "hello", null); + EqualityFilter f1_2 = new EqualityFilter("x", ColumnType.STRING, "hello", null); + EqualityFilter f2 = new EqualityFilter("x", ColumnType.STRING, "world", null); + EqualityFilter f3 = new EqualityFilter("x", ColumnType.STRING, "hello", new FilterTuning(true, null, null)); + Assert.assertArrayEquals(f1.getCacheKey(), f1_2.getCacheKey()); + Assert.assertFalse(Arrays.equals(f1.getCacheKey(), f2.getCacheKey())); + Assert.assertArrayEquals(f1.getCacheKey(), f3.getCacheKey()); + + } + + @Test + public void testInvalidParameters() + { + Throwable t = Assert.assertThrows( + DruidException.class, + () -> new IsTrueDimFilter(null) + ); + Assert.assertEquals("IS TRUE operator requires a non-null filter for field", t.getMessage()); + t = Assert.assertThrows( + DruidException.class, + () -> new IsFalseDimFilter(null) + ); + Assert.assertEquals("IS FALSE operator requires a non-null filter for field", t.getMessage()); + } + + @Test + public void test_equals() + { + EqualsVerifier.forClass(IsTrueDimFilter.class).usingGetClass() + .withNonnullFields("field") + .withIgnoredFields("cachedOptimizedFilter") + .verify(); + + EqualsVerifier.forClass(IsFalseDimFilter.class).usingGetClass() + .withNonnullFields("field") + .withIgnoredFields("cachedOptimizedFilter") + .verify(); + } +} diff --git a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java index 5403ac7aca6cd..06ca5ab3fd481 100644 --- a/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java +++ b/processing/src/test/java/org/apache/druid/segment/filter/EqualityFilterTests.java @@ -37,6 +37,8 @@ import org.apache.druid.math.expr.ExpressionType; import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.FilterTuning; +import org.apache.druid.query.filter.IsFalseDimFilter; +import org.apache.druid.query.filter.IsTrueDimFilter; import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.segment.IndexBuilder; import org.apache.druid.segment.StorageAdapter; @@ -255,6 +257,26 @@ public void testSingleValueStringColumnWithNulls() NotDimFilter.of(new EqualityFilter("s0", ColumnType.STRING, "a", null)), ImmutableList.of("0", "2", "4") ); + // "(s0 = 'a') is not true", same rows as "s0 <> 'a'", but also with null rows + assertFilterMatches( + NotDimFilter.of(IsTrueDimFilter.of(new EqualityFilter("s0", ColumnType.STRING, "a", null))), + ImmutableList.of("0", "2", "3", "4") + ); + // "(s0 = 'a') is true", equivalent to "s0 = 'a'" + assertFilterMatches( + IsTrueDimFilter.of(new EqualityFilter("s0", ColumnType.STRING, "a", null)), + ImmutableList.of("1", "5") + ); + // "(s0 = 'a') is false", equivalent results to "s0 <> 'a'" + assertFilterMatches( + IsFalseDimFilter.of(new EqualityFilter("s0", ColumnType.STRING, "a", null)), + ImmutableList.of("0", "2", "4") + ); + // "(s0 = 'a') is not false", same rows as "s0 = 'a'", but also with null rows + assertFilterMatches( + NotDimFilter.of(IsFalseDimFilter.of(new EqualityFilter("s0", ColumnType.STRING, "a", null))), + ImmutableList.of("1", "3", "5") + ); try { // make sure if 3vl is disabled with behave with 2vl @@ -288,6 +310,28 @@ public void testSingleValueStringColumnWithNulls() NotDimFilter.of(new EqualityFilter("s0", ColumnType.STRING, "noexist", null)), ImmutableList.of("0", "1", "2", "3", "4", "5") ); + + // in default value mode, is true/is false are basically pointless since they have the same behavior as = and <> + // "(s0 = 'a') is not true" equivalent to "s0 <> 'a'" + assertFilterMatches( + NotDimFilter.of(IsTrueDimFilter.of(new EqualityFilter("s0", ColumnType.STRING, "a", null))), + ImmutableList.of("0", "2", "3", "4") + ); + // "(s0 = 'a') is true", equivalent to "s0 = 'a'" + assertFilterMatches( + IsTrueDimFilter.of(new EqualityFilter("s0", ColumnType.STRING, "a", null)), + ImmutableList.of("1", "5") + ); + // "(s0 = 'a') is false" equivalent to "s0 <> 'a'" + assertFilterMatches( + IsFalseDimFilter.of(new EqualityFilter("s0", ColumnType.STRING, "a", null)), + ImmutableList.of("0", "2", "3", "4") + ); + // "(s0 = 'a') is not false", equivalent to "s0 = 'a'" + assertFilterMatches( + NotDimFilter.of(IsFalseDimFilter.of(new EqualityFilter("s0", ColumnType.STRING, "a", null))), + ImmutableList.of("1", "5") + ); } } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java index 438c666227e6e..7a8786e21ef57 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java @@ -45,6 +45,8 @@ import org.apache.druid.query.filter.AndDimFilter; import org.apache.druid.query.filter.DimFilter; import org.apache.druid.query.filter.ExpressionDimFilter; +import org.apache.druid.query.filter.IsFalseDimFilter; +import org.apache.druid.query.filter.IsTrueDimFilter; import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.query.filter.NullFilter; import org.apache.druid.query.filter.OrDimFilter; @@ -379,22 +381,47 @@ public static DimFilter toFilter( { final SqlKind kind = expression.getKind(); - if (kind == SqlKind.IS_TRUE || kind == SqlKind.IS_NOT_FALSE) { - return toFilter( - plannerContext, - rowSignature, - virtualColumnRegistry, - Iterables.getOnlyElement(((RexCall) expression).getOperands()) - ); - } else if (kind == SqlKind.IS_FALSE || kind == SqlKind.IS_NOT_TRUE) { - return new NotDimFilter( - toFilter( + if (kind == SqlKind.IS_TRUE + || kind == SqlKind.IS_NOT_TRUE + || kind == SqlKind.IS_FALSE + || kind == SqlKind.IS_NOT_FALSE) { + if (NullHandling.useThreeValueLogic()) { + final DimFilter baseFilter = toFilter( + plannerContext, + rowSignature, + virtualColumnRegistry, + Iterables.getOnlyElement(((RexCall) expression).getOperands()) + ); + + if (kind == SqlKind.IS_TRUE) { + return IsTrueDimFilter.of(baseFilter); + } else if (kind == SqlKind.IS_NOT_TRUE) { + return NotDimFilter.of(IsTrueDimFilter.of(baseFilter)); + } else if (kind == SqlKind.IS_FALSE) { + return IsFalseDimFilter.of(baseFilter); + } else { // SqlKind.IS_NOT_FALSE + return NotDimFilter.of(IsFalseDimFilter.of(baseFilter)); + } + } else { + // legacy behavior + if (kind == SqlKind.IS_TRUE || kind == SqlKind.IS_NOT_FALSE) { + return toFilter( plannerContext, rowSignature, virtualColumnRegistry, Iterables.getOnlyElement(((RexCall) expression).getOperands()) - ) - ); + ); + } else { // SqlKind.IS_FALSE || SqlKind.IS_NOT_TRUE + return new NotDimFilter( + toFilter( + plannerContext, + rowSignature, + virtualColumnRegistry, + Iterables.getOnlyElement(((RexCall) expression).getOperands()) + ) + ); + } + } } else if (kind == SqlKind.CAST && expression.getType().getSqlTypeName() == SqlTypeName.BOOLEAN) { // Calcite sometimes leaves errant, useless cast-to-booleans inside filters. Strip them and continue. return toFilter( @@ -403,9 +430,7 @@ public static DimFilter toFilter( virtualColumnRegistry, Iterables.getOnlyElement(((RexCall) expression).getOperands()) ); - } else if (kind == SqlKind.AND - || kind == SqlKind.OR - || kind == SqlKind.NOT) { + } else if (kind == SqlKind.AND || kind == SqlKind.OR || kind == SqlKind.NOT) { final List filters = new ArrayList<>(); for (final RexNode rexNode : ((RexCall) expression).getOperands()) { final DimFilter nextFilter = toFilter( @@ -424,8 +449,7 @@ public static DimFilter toFilter( return new AndDimFilter(filters); } else if (kind == SqlKind.OR) { return new OrDimFilter(filters); - } else { - assert kind == SqlKind.NOT; + } else { // SqlKind.NOT return new NotDimFilter(Iterables.getOnlyElement(filters)); } } else { @@ -488,6 +512,11 @@ private static DimFilter toSimpleLeafFilter( final SqlKind kind = rexNode.getKind(); if (kind == SqlKind.IS_TRUE || kind == SqlKind.IS_NOT_FALSE) { + if (NullHandling.useThreeValueLogic()) { + // use expression filter to get istrue or notfalse expressions for correct 3vl behavior + return toExpressionLeafFilter(plannerContext, rowSignature, rexNode); + } + // legacy behavior return toSimpleLeafFilter( plannerContext, rowSignature, @@ -495,6 +524,11 @@ private static DimFilter toSimpleLeafFilter( Iterables.getOnlyElement(((RexCall) rexNode).getOperands()) ); } else if (kind == SqlKind.IS_FALSE || kind == SqlKind.IS_NOT_TRUE) { + if (NullHandling.useThreeValueLogic()) { + // use expression filter to get isfalse or nottrue expressions for correct 3vl behavior + return toExpressionLeafFilter(plannerContext, rowSignature, rexNode); + } + // legacy behavior return new NotDimFilter( toSimpleLeafFilter( plannerContext, diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/BottomUpTransform.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/BottomUpTransform.java index f594878b29475..a0d28f372e316 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/BottomUpTransform.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/BottomUpTransform.java @@ -23,6 +23,8 @@ import com.google.common.base.Preconditions; import org.apache.druid.query.filter.AndDimFilter; import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.IsFalseDimFilter; +import org.apache.druid.query.filter.IsTrueDimFilter; import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.query.filter.OrDimFilter; @@ -89,6 +91,22 @@ private DimFilter apply0(final DimFilter filter) } else { return checkedProcess(filter); } + } else if (filter instanceof IsTrueDimFilter) { + final DimFilter oldFilter = ((IsTrueDimFilter) filter).getField(); + final DimFilter newFilter = apply0(oldFilter); + if (!oldFilter.equals(newFilter)) { + return checkedProcess(new IsTrueDimFilter(newFilter)); + } else { + return checkedProcess(filter); + } + } else if (filter instanceof IsFalseDimFilter) { + final DimFilter oldFilter = ((IsFalseDimFilter) filter).getField(); + final DimFilter newFilter = apply0(oldFilter); + if (!oldFilter.equals(newFilter)) { + return checkedProcess(new IsFalseDimFilter(newFilter)); + } else { + return checkedProcess(filter); + } } else { return checkedProcess(filter); } diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Filtration.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Filtration.java index df03ff9662bef..95466635a1142 100644 --- a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Filtration.java +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/Filtration.java @@ -114,6 +114,7 @@ public Filtration optimize(final RowSignature rowSignature) MoveTimeFiltersToIntervals.instance(), ConvertBoundsToSelectors.create(rowSignature), ConvertSelectorsToIns.create(rowSignature), + RemoveRedundantIsTrue.instance(), MoveMarkerFiltersToIntervals.instance(), ValidateNoMarkerFiltersRemain.instance() ) @@ -136,7 +137,8 @@ public Filtration optimizeFilterOnly(final RowSignature rowSignature) ImmutableList.of( CombineAndSimplifyBounds.instance(), ConvertBoundsToSelectors.create(rowSignature), - ConvertSelectorsToIns.create(rowSignature) + ConvertSelectorsToIns.create(rowSignature), + RemoveRedundantIsTrue.instance() ) ); diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/filtration/RemoveRedundantIsTrue.java b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/RemoveRedundantIsTrue.java new file mode 100644 index 0000000000000..d21b22ef4f960 --- /dev/null +++ b/sql/src/main/java/org/apache/druid/sql/calcite/filtration/RemoveRedundantIsTrue.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.sql.calcite.filtration; + +import com.google.common.base.Function; +import org.apache.druid.query.filter.AndDimFilter; +import org.apache.druid.query.filter.DimFilter; +import org.apache.druid.query.filter.IsTrueDimFilter; +import org.apache.druid.query.filter.OrDimFilter; + +import java.util.ArrayList; +import java.util.List; + +/** + * Similar to {@link BottomUpTransform} except only removes redundant IS TRUE filters that are not inside of a NOT + * filter. The planner leaves behind stuff like `(x == y) IS TRUE` which is a pointless delegate when not living inside + * of a not filter to enforce proper three-value logic + */ +public class RemoveRedundantIsTrue implements Function +{ + private static final RemoveRedundantIsTrue INSTANCE = new RemoveRedundantIsTrue(); + + public static RemoveRedundantIsTrue instance() + { + return INSTANCE; + } + + @Override + public Filtration apply(Filtration filtration) + { + if (filtration.getDimFilter() != null) { + final Filtration retVal = Filtration.create(apply0(filtration.getDimFilter()), filtration.getIntervals()); + return filtration.equals(retVal) ? retVal : apply(retVal); + } else { + return filtration; + } + } + + private DimFilter apply0(final DimFilter filter) + { + // check for AND, OR to process their children, and unwrap any IS TRUE not living under a NOT, anything else we + // leave alone + if (filter instanceof AndDimFilter) { + final List oldFilters = ((AndDimFilter) filter).getFields(); + final List newFilters = new ArrayList<>(); + for (DimFilter oldFilter : oldFilters) { + final DimFilter newFilter = apply0(oldFilter); + if (newFilter != null) { + newFilters.add(newFilter); + } + } + if (!newFilters.equals(oldFilters)) { + return new AndDimFilter(newFilters); + } else { + return filter; + } + } else if (filter instanceof OrDimFilter) { + final List oldFilters = ((OrDimFilter) filter).getFields(); + final List newFilters = new ArrayList<>(); + for (DimFilter oldFilter : oldFilters) { + final DimFilter newFilter = apply0(oldFilter); + if (newFilter != null) { + newFilters.add(newFilter); + } + } + if (!newFilters.equals(oldFilters)) { + return new OrDimFilter(newFilters); + } else { + return filter; + } + } else if (filter instanceof IsTrueDimFilter) { + final DimFilter oldFilter = ((IsTrueDimFilter) filter).getField(); + final DimFilter newFilter = apply0(oldFilter); + if (!oldFilter.equals(newFilter)) { + return newFilter; + } else { + return oldFilter; + } + } else { + return filter; + } + } +} diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java index c6f41aeb2efbc..1527e75fa3853 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/BaseCalciteQueryTest.java @@ -59,6 +59,7 @@ import org.apache.druid.query.filter.EqualityFilter; import org.apache.druid.query.filter.ExpressionDimFilter; import org.apache.druid.query.filter.InDimFilter; +import org.apache.druid.query.filter.IsTrueDimFilter; import org.apache.druid.query.filter.NotDimFilter; import org.apache.druid.query.filter.NullFilter; import org.apache.druid.query.filter.OrDimFilter; @@ -365,6 +366,11 @@ public static NotDimFilter not(DimFilter filter) return new NotDimFilter(filter); } + public static IsTrueDimFilter istrue(DimFilter filter) + { + return new IsTrueDimFilter(filter); + } + public static InDimFilter in(String dimension, Collection values, ExtractionFn extractionFn) { return new InDimFilter(dimension, values, extractionFn); diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java index 086633d7e59c6..3a5da7d325ff8 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java @@ -6470,7 +6470,9 @@ public void testUnnestVirtualWithColumnsAndNullIf() expressionFilter("(\"j0.unnest\" == \"m2\")"), and( isNull("j0.unnest"), - not(expressionFilter("(\"j0.unnest\" == \"m2\")")) + NullHandling.sqlCompatible() + ? not(istrue(expressionFilter("(\"j0.unnest\" == \"m2\")"))) + : not(expressionFilter("(\"j0.unnest\" == \"m2\")")) ) )) .legacy(false) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java index 908ccae687b2d..0882f3c9cb12e 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java @@ -5607,7 +5607,14 @@ public void testRegressionFilteredAggregatorsSubqueryJoins(Map q or( isNull("__j0.a0"), not( - or( + NullHandling.sqlCompatible() + ? istrue( + or( + not(expressionFilter("\"__j0.d0\"")), + notNull("__j0.d0") + ) + ) + : or( not(expressionFilter("\"__j0.d0\"")), notNull("__j0.d0") ) diff --git a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java index 08fdbc3bfaa0f..49b66f8bc9f9e 100644 --- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java +++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java @@ -3101,7 +3101,9 @@ public void testNullEmptyStringEquality() equality("dim2", "a", ColumnType.STRING), and( isNull("dim2"), - not(equality("dim2", "a", ColumnType.STRING)) + NullHandling.sqlCompatible() + ? not(istrue(equality("dim2", "a", ColumnType.STRING))) + : not(selector("dim2", "a")) ) ) ) @@ -3109,12 +3111,12 @@ public void testNullEmptyStringEquality() .context(QUERY_CONTEXT_DEFAULT) .build() ), - ImmutableList.of( NullHandling.replaceWithDefault() // Matches everything but "abc" ? new Object[]{5L} - : new Object[]{2L} + // match only null values + : new Object[]{4L} ) ); } @@ -4847,13 +4849,15 @@ public void testFilteredAggregations() ), new FilteredAggregatorFactory( new LongSumAggregatorFactory("a1", "cnt"), - not(equality("dim1", "abc", ColumnType.STRING)) + NullHandling.sqlCompatible() + ? not(istrue(equality("dim1", "abc", ColumnType.STRING))) + : not(selector("dim1", "abc")) ), new FilteredAggregatorFactory( new LongSumAggregatorFactory("a2", "cnt"), - NullHandling.replaceWithDefault() - ? selector("dim1", "a", new SubstringDimExtractionFn(0, 1)) - : expressionFilter("(substring(\"dim1\", 0, 1) == 'a')") + NullHandling.sqlCompatible() + ? expressionFilter("(substring(\"dim1\", 0, 1) == 'a')") + : selector("dim1", "a", new SubstringDimExtractionFn(0, 1)) ), new FilteredAggregatorFactory(